1 //===-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines an instruction selector for the Hexagon target. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "Hexagon.h" 15 #include "HexagonISelDAGToDAG.h" 16 #include "HexagonISelLowering.h" 17 #include "HexagonMachineFunctionInfo.h" 18 #include "HexagonTargetMachine.h" 19 #include "llvm/CodeGen/FunctionLoweringInfo.h" 20 #include "llvm/CodeGen/MachineInstrBuilder.h" 21 #include "llvm/CodeGen/SelectionDAGISel.h" 22 #include "llvm/IR/Intrinsics.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/Debug.h" 25 using namespace llvm; 26 27 #define DEBUG_TYPE "hexagon-isel" 28 29 static 30 cl::opt<bool> 31 EnableAddressRebalancing("isel-rebalance-addr", cl::Hidden, cl::init(true), 32 cl::desc("Rebalance address calculation trees to improve " 33 "instruction selection")); 34 35 // Rebalance only if this allows e.g. combining a GA with an offset or 36 // factoring out a shift. 37 static 38 cl::opt<bool> 39 RebalanceOnlyForOptimizations("rebalance-only-opt", cl::Hidden, cl::init(false), 40 cl::desc("Rebalance address tree only if this allows optimizations")); 41 42 static 43 cl::opt<bool> 44 RebalanceOnlyImbalancedTrees("rebalance-only-imbal", cl::Hidden, 45 cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced")); 46 47 static cl::opt<bool> CheckSingleUse("hexagon-isel-su", cl::Hidden, 48 cl::init(true), cl::desc("Enable checking of SDNode's single-use status")); 49 50 //===----------------------------------------------------------------------===// 51 // Instruction Selector Implementation 52 //===----------------------------------------------------------------------===// 53 54 #define GET_DAGISEL_BODY HexagonDAGToDAGISel 55 #include "HexagonGenDAGISel.inc" 56 57 /// createHexagonISelDag - This pass converts a legalized DAG into a 58 /// Hexagon-specific DAG, ready for instruction scheduling. 59 /// 60 namespace llvm { 61 FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM, 62 CodeGenOpt::Level OptLevel) { 63 return new HexagonDAGToDAGISel(TM, OptLevel); 64 } 65 } 66 67 void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) { 68 SDValue Chain = LD->getChain(); 69 SDValue Base = LD->getBasePtr(); 70 SDValue Offset = LD->getOffset(); 71 int32_t Inc = cast<ConstantSDNode>(Offset.getNode())->getSExtValue(); 72 EVT LoadedVT = LD->getMemoryVT(); 73 unsigned Opcode = 0; 74 75 // Check for zero extended loads. Treat any-extend loads as zero extended 76 // loads. 77 ISD::LoadExtType ExtType = LD->getExtensionType(); 78 bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD); 79 bool IsValidInc = HII->isValidAutoIncImm(LoadedVT, Inc); 80 81 assert(LoadedVT.isSimple()); 82 switch (LoadedVT.getSimpleVT().SimpleTy) { 83 case MVT::i8: 84 if (IsZeroExt) 85 Opcode = IsValidInc ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrub_io; 86 else 87 Opcode = IsValidInc ? Hexagon::L2_loadrb_pi : Hexagon::L2_loadrb_io; 88 break; 89 case MVT::i16: 90 if (IsZeroExt) 91 Opcode = IsValidInc ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadruh_io; 92 else 93 Opcode = IsValidInc ? Hexagon::L2_loadrh_pi : Hexagon::L2_loadrh_io; 94 break; 95 case MVT::i32: 96 case MVT::f32: 97 case MVT::v2i16: 98 case MVT::v4i8: 99 Opcode = IsValidInc ? Hexagon::L2_loadri_pi : Hexagon::L2_loadri_io; 100 break; 101 case MVT::i64: 102 case MVT::f64: 103 case MVT::v2i32: 104 case MVT::v4i16: 105 case MVT::v8i8: 106 Opcode = IsValidInc ? Hexagon::L2_loadrd_pi : Hexagon::L2_loadrd_io; 107 break; 108 case MVT::v64i8: 109 case MVT::v32i16: 110 case MVT::v16i32: 111 case MVT::v8i64: 112 case MVT::v128i8: 113 case MVT::v64i16: 114 case MVT::v32i32: 115 case MVT::v16i64: 116 if (isAlignedMemNode(LD)) { 117 if (LD->isNonTemporal()) 118 Opcode = IsValidInc ? Hexagon::V6_vL32b_nt_pi : Hexagon::V6_vL32b_nt_ai; 119 else 120 Opcode = IsValidInc ? Hexagon::V6_vL32b_pi : Hexagon::V6_vL32b_ai; 121 } else { 122 Opcode = IsValidInc ? Hexagon::V6_vL32Ub_pi : Hexagon::V6_vL32Ub_ai; 123 } 124 break; 125 default: 126 llvm_unreachable("Unexpected memory type in indexed load"); 127 } 128 129 SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32); 130 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 131 MemOp[0] = LD->getMemOperand(); 132 133 auto getExt64 = [this,ExtType] (MachineSDNode *N, const SDLoc &dl) 134 -> MachineSDNode* { 135 if (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD) { 136 SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32); 137 return CurDAG->getMachineNode(Hexagon::A4_combineir, dl, MVT::i64, 138 Zero, SDValue(N, 0)); 139 } 140 if (ExtType == ISD::SEXTLOAD) 141 return CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64, 142 SDValue(N, 0)); 143 return N; 144 }; 145 146 // Loaded value Next address Chain 147 SDValue From[3] = { SDValue(LD,0), SDValue(LD,1), SDValue(LD,2) }; 148 SDValue To[3]; 149 150 EVT ValueVT = LD->getValueType(0); 151 if (ValueVT == MVT::i64 && ExtType != ISD::NON_EXTLOAD) { 152 // A load extending to i64 will actually produce i32, which will then 153 // need to be extended to i64. 154 assert(LoadedVT.getSizeInBits() <= 32); 155 ValueVT = MVT::i32; 156 } 157 158 if (IsValidInc) { 159 MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT, 160 MVT::i32, MVT::Other, Base, 161 IncV, Chain); 162 L->setMemRefs(MemOp, MemOp+1); 163 To[1] = SDValue(L, 1); // Next address. 164 To[2] = SDValue(L, 2); // Chain. 165 // Handle special case for extension to i64. 166 if (LD->getValueType(0) == MVT::i64) 167 L = getExt64(L, dl); 168 To[0] = SDValue(L, 0); // Loaded (extended) value. 169 } else { 170 SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32); 171 MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT, MVT::Other, 172 Base, Zero, Chain); 173 L->setMemRefs(MemOp, MemOp+1); 174 To[2] = SDValue(L, 1); // Chain. 175 MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32, 176 Base, IncV); 177 To[1] = SDValue(A, 0); // Next address. 178 // Handle special case for extension to i64. 179 if (LD->getValueType(0) == MVT::i64) 180 L = getExt64(L, dl); 181 To[0] = SDValue(L, 0); // Loaded (extended) value. 182 } 183 ReplaceUses(From, To, 3); 184 CurDAG->RemoveDeadNode(LD); 185 } 186 187 MachineSDNode *HexagonDAGToDAGISel::LoadInstrForLoadIntrinsic(SDNode *IntN) { 188 if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN) 189 return nullptr; 190 191 SDLoc dl(IntN); 192 unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue(); 193 194 static std::map<unsigned,unsigned> LoadPciMap = { 195 { Intrinsic::hexagon_circ_ldb, Hexagon::L2_loadrb_pci }, 196 { Intrinsic::hexagon_circ_ldub, Hexagon::L2_loadrub_pci }, 197 { Intrinsic::hexagon_circ_ldh, Hexagon::L2_loadrh_pci }, 198 { Intrinsic::hexagon_circ_lduh, Hexagon::L2_loadruh_pci }, 199 { Intrinsic::hexagon_circ_ldw, Hexagon::L2_loadri_pci }, 200 { Intrinsic::hexagon_circ_ldd, Hexagon::L2_loadrd_pci }, 201 }; 202 auto FLC = LoadPciMap.find(IntNo); 203 if (FLC != LoadPciMap.end()) { 204 EVT ValTy = (IntNo == Intrinsic::hexagon_circ_ldd) ? MVT::i64 : MVT::i32; 205 EVT RTys[] = { ValTy, MVT::i32, MVT::Other }; 206 // Operands: { Base, Increment, Modifier, Chain } 207 auto Inc = cast<ConstantSDNode>(IntN->getOperand(5)); 208 SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), dl, MVT::i32); 209 MachineSDNode *Res = CurDAG->getMachineNode(FLC->second, dl, RTys, 210 { IntN->getOperand(2), I, IntN->getOperand(4), 211 IntN->getOperand(0) }); 212 return Res; 213 } 214 215 return nullptr; 216 } 217 218 SDNode *HexagonDAGToDAGISel::StoreInstrForLoadIntrinsic(MachineSDNode *LoadN, 219 SDNode *IntN) { 220 // The "LoadN" is just a machine load instruction. The intrinsic also 221 // involves storing it. Generate an appropriate store to the location 222 // given in the intrinsic's operand(3). 223 uint64_t F = HII->get(LoadN->getMachineOpcode()).TSFlags; 224 unsigned SizeBits = (F >> HexagonII::MemAccessSizePos) & 225 HexagonII::MemAccesSizeMask; 226 unsigned Size = 1U << (SizeBits-1); 227 228 SDLoc dl(IntN); 229 MachinePointerInfo PI; 230 SDValue TS; 231 SDValue Loc = IntN->getOperand(3); 232 233 if (Size >= 4) 234 TS = CurDAG->getStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc, PI, 235 Size); 236 else 237 TS = CurDAG->getTruncStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc, 238 PI, MVT::getIntegerVT(Size * 8), Size); 239 240 SDNode *StoreN; 241 { 242 HandleSDNode Handle(TS); 243 SelectStore(TS.getNode()); 244 StoreN = Handle.getValue().getNode(); 245 } 246 247 // Load's results are { Loaded value, Updated pointer, Chain } 248 ReplaceUses(SDValue(IntN, 0), SDValue(LoadN, 1)); 249 ReplaceUses(SDValue(IntN, 1), SDValue(StoreN, 0)); 250 return StoreN; 251 } 252 253 bool HexagonDAGToDAGISel::tryLoadOfLoadIntrinsic(LoadSDNode *N) { 254 // The intrinsics for load circ/brev perform two operations: 255 // 1. Load a value V from the specified location, using the addressing 256 // mode corresponding to the intrinsic. 257 // 2. Store V into a specified location. This location is typically a 258 // local, temporary object. 259 // In many cases, the program using these intrinsics will immediately 260 // load V again from the local object. In those cases, when certain 261 // conditions are met, the last load can be removed. 262 // This function identifies and optimizes this pattern. If the pattern 263 // cannot be optimized, it returns nullptr, which will cause the load 264 // to be selected separately from the intrinsic (which will be handled 265 // in SelectIntrinsicWChain). 266 267 SDValue Ch = N->getOperand(0); 268 SDValue Loc = N->getOperand(1); 269 270 // Assume that the load and the intrinsic are connected directly with a 271 // chain: 272 // t1: i32,ch = int.load ..., ..., ..., Loc, ... // <-- C 273 // t2: i32,ch = load t1:1, Loc, ... 274 SDNode *C = Ch.getNode(); 275 276 if (C->getOpcode() != ISD::INTRINSIC_W_CHAIN) 277 return false; 278 279 // The second load can only be eliminated if its extension type matches 280 // that of the load instruction corresponding to the intrinsic. The user 281 // can provide an address of an unsigned variable to store the result of 282 // a sign-extending intrinsic into (or the other way around). 283 ISD::LoadExtType IntExt; 284 switch (cast<ConstantSDNode>(C->getOperand(1))->getZExtValue()) { 285 case Intrinsic::hexagon_circ_ldub: 286 case Intrinsic::hexagon_circ_lduh: 287 IntExt = ISD::ZEXTLOAD; 288 break; 289 case Intrinsic::hexagon_circ_ldw: 290 case Intrinsic::hexagon_circ_ldd: 291 IntExt = ISD::NON_EXTLOAD; 292 break; 293 default: 294 IntExt = ISD::SEXTLOAD; 295 break; 296 } 297 if (N->getExtensionType() != IntExt) 298 return false; 299 300 // Make sure the target location for the loaded value in the load intrinsic 301 // is the location from which LD (or N) is loading. 302 if (C->getNumOperands() < 4 || Loc.getNode() != C->getOperand(3).getNode()) 303 return false; 304 305 if (MachineSDNode *L = LoadInstrForLoadIntrinsic(C)) { 306 SDNode *S = StoreInstrForLoadIntrinsic(L, C); 307 SDValue F[] = { SDValue(N,0), SDValue(N,1), SDValue(C,0), SDValue(C,1) }; 308 SDValue T[] = { SDValue(L,0), SDValue(S,0), SDValue(L,1), SDValue(S,0) }; 309 ReplaceUses(F, T, array_lengthof(T)); 310 // This transformation will leave the intrinsic dead. If it remains in 311 // the DAG, the selection code will see it again, but without the load, 312 // and it will generate a store that is normally required for it. 313 CurDAG->RemoveDeadNode(C); 314 return true; 315 } 316 return false; 317 } 318 319 // Convert the bit-reverse load intrinsic to appropriate target instruction. 320 bool HexagonDAGToDAGISel::SelectBrevLdIntrinsic(SDNode *IntN) { 321 if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN) 322 return false; 323 324 const SDLoc &dl(IntN); 325 unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue(); 326 327 static const std::map<unsigned, unsigned> LoadBrevMap = { 328 { Intrinsic::hexagon_L2_loadrb_pbr, Hexagon::L2_loadrb_pbr }, 329 { Intrinsic::hexagon_L2_loadrub_pbr, Hexagon::L2_loadrub_pbr }, 330 { Intrinsic::hexagon_L2_loadrh_pbr, Hexagon::L2_loadrh_pbr }, 331 { Intrinsic::hexagon_L2_loadruh_pbr, Hexagon::L2_loadruh_pbr }, 332 { Intrinsic::hexagon_L2_loadri_pbr, Hexagon::L2_loadri_pbr }, 333 { Intrinsic::hexagon_L2_loadrd_pbr, Hexagon::L2_loadrd_pbr } 334 }; 335 auto FLI = LoadBrevMap.find(IntNo); 336 if (FLI != LoadBrevMap.end()) { 337 EVT ValTy = 338 (IntNo == Intrinsic::hexagon_L2_loadrd_pbr) ? MVT::i64 : MVT::i32; 339 EVT RTys[] = { ValTy, MVT::i32, MVT::Other }; 340 // Operands of Intrinsic: {chain, enum ID of intrinsic, baseptr, 341 // modifier}. 342 // Operands of target instruction: { Base, Modifier, Chain }. 343 MachineSDNode *Res = CurDAG->getMachineNode( 344 FLI->second, dl, RTys, 345 {IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(0)}); 346 347 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 348 MemOp[0] = cast<MemIntrinsicSDNode>(IntN)->getMemOperand(); 349 Res->setMemRefs(MemOp, MemOp + 1); 350 351 ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0)); 352 ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1)); 353 ReplaceUses(SDValue(IntN, 2), SDValue(Res, 2)); 354 CurDAG->RemoveDeadNode(IntN); 355 return true; 356 } 357 return false; 358 } 359 360 /// Generate a machine instruction node for the new circlar buffer intrinsics. 361 /// The new versions use a CSx register instead of the K field. 362 bool HexagonDAGToDAGISel::SelectNewCircIntrinsic(SDNode *IntN) { 363 if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN) 364 return false; 365 366 SDLoc DL(IntN); 367 unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue(); 368 SmallVector<SDValue, 7> Ops; 369 370 static std::map<unsigned,unsigned> LoadNPcMap = { 371 { Intrinsic::hexagon_L2_loadrub_pci, Hexagon::PS_loadrub_pci }, 372 { Intrinsic::hexagon_L2_loadrb_pci, Hexagon::PS_loadrb_pci }, 373 { Intrinsic::hexagon_L2_loadruh_pci, Hexagon::PS_loadruh_pci }, 374 { Intrinsic::hexagon_L2_loadrh_pci, Hexagon::PS_loadrh_pci }, 375 { Intrinsic::hexagon_L2_loadri_pci, Hexagon::PS_loadri_pci }, 376 { Intrinsic::hexagon_L2_loadrd_pci, Hexagon::PS_loadrd_pci }, 377 { Intrinsic::hexagon_L2_loadrub_pcr, Hexagon::PS_loadrub_pcr }, 378 { Intrinsic::hexagon_L2_loadrb_pcr, Hexagon::PS_loadrb_pcr }, 379 { Intrinsic::hexagon_L2_loadruh_pcr, Hexagon::PS_loadruh_pcr }, 380 { Intrinsic::hexagon_L2_loadrh_pcr, Hexagon::PS_loadrh_pcr }, 381 { Intrinsic::hexagon_L2_loadri_pcr, Hexagon::PS_loadri_pcr }, 382 { Intrinsic::hexagon_L2_loadrd_pcr, Hexagon::PS_loadrd_pcr } 383 }; 384 auto FLI = LoadNPcMap.find (IntNo); 385 if (FLI != LoadNPcMap.end()) { 386 EVT ValTy = MVT::i32; 387 if (IntNo == Intrinsic::hexagon_L2_loadrd_pci || 388 IntNo == Intrinsic::hexagon_L2_loadrd_pcr) 389 ValTy = MVT::i64; 390 EVT RTys[] = { ValTy, MVT::i32, MVT::Other }; 391 // Handle load.*_pci case which has 6 operands. 392 if (IntN->getNumOperands() == 6) { 393 auto Inc = cast<ConstantSDNode>(IntN->getOperand(3)); 394 SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), DL, MVT::i32); 395 // Operands: { Base, Increment, Modifier, Start, Chain }. 396 Ops = { IntN->getOperand(2), I, IntN->getOperand(4), IntN->getOperand(5), 397 IntN->getOperand(0) }; 398 } else 399 // Handle load.*_pcr case which has 5 operands. 400 // Operands: { Base, Modifier, Start, Chain }. 401 Ops = { IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(4), 402 IntN->getOperand(0) }; 403 MachineSDNode *Res = CurDAG->getMachineNode(FLI->second, DL, RTys, Ops); 404 ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0)); 405 ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1)); 406 ReplaceUses(SDValue(IntN, 2), SDValue(Res, 2)); 407 CurDAG->RemoveDeadNode(IntN); 408 return true; 409 } 410 411 static std::map<unsigned,unsigned> StoreNPcMap = { 412 { Intrinsic::hexagon_S2_storerb_pci, Hexagon::PS_storerb_pci }, 413 { Intrinsic::hexagon_S2_storerh_pci, Hexagon::PS_storerh_pci }, 414 { Intrinsic::hexagon_S2_storerf_pci, Hexagon::PS_storerf_pci }, 415 { Intrinsic::hexagon_S2_storeri_pci, Hexagon::PS_storeri_pci }, 416 { Intrinsic::hexagon_S2_storerd_pci, Hexagon::PS_storerd_pci }, 417 { Intrinsic::hexagon_S2_storerb_pcr, Hexagon::PS_storerb_pcr }, 418 { Intrinsic::hexagon_S2_storerh_pcr, Hexagon::PS_storerh_pcr }, 419 { Intrinsic::hexagon_S2_storerf_pcr, Hexagon::PS_storerf_pcr }, 420 { Intrinsic::hexagon_S2_storeri_pcr, Hexagon::PS_storeri_pcr }, 421 { Intrinsic::hexagon_S2_storerd_pcr, Hexagon::PS_storerd_pcr } 422 }; 423 auto FSI = StoreNPcMap.find (IntNo); 424 if (FSI != StoreNPcMap.end()) { 425 EVT RTys[] = { MVT::i32, MVT::Other }; 426 // Handle store.*_pci case which has 7 operands. 427 if (IntN->getNumOperands() == 7) { 428 auto Inc = cast<ConstantSDNode>(IntN->getOperand(3)); 429 SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), DL, MVT::i32); 430 // Operands: { Base, Increment, Modifier, Value, Start, Chain }. 431 Ops = { IntN->getOperand(2), I, IntN->getOperand(4), IntN->getOperand(5), 432 IntN->getOperand(6), IntN->getOperand(0) }; 433 } else 434 // Handle store.*_pcr case which has 6 operands. 435 // Operands: { Base, Modifier, Value, Start, Chain }. 436 Ops = { IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(4), 437 IntN->getOperand(5), IntN->getOperand(0) }; 438 MachineSDNode *Res = CurDAG->getMachineNode(FSI->second, DL, RTys, Ops); 439 ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0)); 440 ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1)); 441 CurDAG->RemoveDeadNode(IntN); 442 return true; 443 } 444 445 return false; 446 } 447 448 void HexagonDAGToDAGISel::SelectLoad(SDNode *N) { 449 SDLoc dl(N); 450 LoadSDNode *LD = cast<LoadSDNode>(N); 451 452 // Handle indexed loads. 453 ISD::MemIndexedMode AM = LD->getAddressingMode(); 454 if (AM != ISD::UNINDEXED) { 455 SelectIndexedLoad(LD, dl); 456 return; 457 } 458 459 // Handle patterns using circ/brev load intrinsics. 460 if (tryLoadOfLoadIntrinsic(LD)) 461 return; 462 463 SelectCode(LD); 464 } 465 466 void HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl) { 467 SDValue Chain = ST->getChain(); 468 SDValue Base = ST->getBasePtr(); 469 SDValue Offset = ST->getOffset(); 470 SDValue Value = ST->getValue(); 471 // Get the constant value. 472 int32_t Inc = cast<ConstantSDNode>(Offset.getNode())->getSExtValue(); 473 EVT StoredVT = ST->getMemoryVT(); 474 EVT ValueVT = Value.getValueType(); 475 476 bool IsValidInc = HII->isValidAutoIncImm(StoredVT, Inc); 477 unsigned Opcode = 0; 478 479 assert(StoredVT.isSimple()); 480 switch (StoredVT.getSimpleVT().SimpleTy) { 481 case MVT::i8: 482 Opcode = IsValidInc ? Hexagon::S2_storerb_pi : Hexagon::S2_storerb_io; 483 break; 484 case MVT::i16: 485 Opcode = IsValidInc ? Hexagon::S2_storerh_pi : Hexagon::S2_storerh_io; 486 break; 487 case MVT::i32: 488 case MVT::f32: 489 case MVT::v2i16: 490 case MVT::v4i8: 491 Opcode = IsValidInc ? Hexagon::S2_storeri_pi : Hexagon::S2_storeri_io; 492 break; 493 case MVT::i64: 494 case MVT::f64: 495 case MVT::v2i32: 496 case MVT::v4i16: 497 case MVT::v8i8: 498 Opcode = IsValidInc ? Hexagon::S2_storerd_pi : Hexagon::S2_storerd_io; 499 break; 500 case MVT::v64i8: 501 case MVT::v32i16: 502 case MVT::v16i32: 503 case MVT::v8i64: 504 case MVT::v128i8: 505 case MVT::v64i16: 506 case MVT::v32i32: 507 case MVT::v16i64: 508 if (isAlignedMemNode(ST)) { 509 if (ST->isNonTemporal()) 510 Opcode = IsValidInc ? Hexagon::V6_vS32b_nt_pi : Hexagon::V6_vS32b_nt_ai; 511 else 512 Opcode = IsValidInc ? Hexagon::V6_vS32b_pi : Hexagon::V6_vS32b_ai; 513 } else { 514 Opcode = IsValidInc ? Hexagon::V6_vS32Ub_pi : Hexagon::V6_vS32Ub_ai; 515 } 516 break; 517 default: 518 llvm_unreachable("Unexpected memory type in indexed store"); 519 } 520 521 if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) { 522 assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store"); 523 Value = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo, 524 dl, MVT::i32, Value); 525 } 526 527 SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32); 528 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 529 MemOp[0] = ST->getMemOperand(); 530 531 // Next address Chain 532 SDValue From[2] = { SDValue(ST,0), SDValue(ST,1) }; 533 SDValue To[2]; 534 535 if (IsValidInc) { 536 // Build post increment store. 537 SDValue Ops[] = { Base, IncV, Value, Chain }; 538 MachineSDNode *S = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Other, 539 Ops); 540 S->setMemRefs(MemOp, MemOp + 1); 541 To[0] = SDValue(S, 0); 542 To[1] = SDValue(S, 1); 543 } else { 544 SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32); 545 SDValue Ops[] = { Base, Zero, Value, Chain }; 546 MachineSDNode *S = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); 547 S->setMemRefs(MemOp, MemOp + 1); 548 To[1] = SDValue(S, 0); 549 MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32, 550 Base, IncV); 551 To[0] = SDValue(A, 0); 552 } 553 554 ReplaceUses(From, To, 2); 555 CurDAG->RemoveDeadNode(ST); 556 } 557 558 void HexagonDAGToDAGISel::SelectStore(SDNode *N) { 559 SDLoc dl(N); 560 StoreSDNode *ST = cast<StoreSDNode>(N); 561 562 // Handle indexed stores. 563 ISD::MemIndexedMode AM = ST->getAddressingMode(); 564 if (AM != ISD::UNINDEXED) { 565 SelectIndexedStore(ST, dl); 566 return; 567 } 568 569 SelectCode(ST); 570 } 571 572 void HexagonDAGToDAGISel::SelectSHL(SDNode *N) { 573 SDLoc dl(N); 574 SDValue Shl_0 = N->getOperand(0); 575 SDValue Shl_1 = N->getOperand(1); 576 577 auto Default = [this,N] () -> void { SelectCode(N); }; 578 579 if (N->getValueType(0) != MVT::i32 || Shl_1.getOpcode() != ISD::Constant) 580 return Default(); 581 582 // RHS is const. 583 int32_t ShlConst = cast<ConstantSDNode>(Shl_1)->getSExtValue(); 584 585 if (Shl_0.getOpcode() == ISD::MUL) { 586 SDValue Mul_0 = Shl_0.getOperand(0); // Val 587 SDValue Mul_1 = Shl_0.getOperand(1); // Const 588 // RHS of mul is const. 589 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Mul_1)) { 590 int32_t ValConst = C->getSExtValue() << ShlConst; 591 if (isInt<9>(ValConst)) { 592 SDValue Val = CurDAG->getTargetConstant(ValConst, dl, MVT::i32); 593 SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl, 594 MVT::i32, Mul_0, Val); 595 ReplaceNode(N, Result); 596 return; 597 } 598 } 599 return Default(); 600 } 601 602 if (Shl_0.getOpcode() == ISD::SUB) { 603 SDValue Sub_0 = Shl_0.getOperand(0); // Const 0 604 SDValue Sub_1 = Shl_0.getOperand(1); // Val 605 if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Sub_0)) { 606 if (C1->getSExtValue() != 0 || Sub_1.getOpcode() != ISD::SHL) 607 return Default(); 608 SDValue Shl2_0 = Sub_1.getOperand(0); // Val 609 SDValue Shl2_1 = Sub_1.getOperand(1); // Const 610 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(Shl2_1)) { 611 int32_t ValConst = 1 << (ShlConst + C2->getSExtValue()); 612 if (isInt<9>(-ValConst)) { 613 SDValue Val = CurDAG->getTargetConstant(-ValConst, dl, MVT::i32); 614 SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl, 615 MVT::i32, Shl2_0, Val); 616 ReplaceNode(N, Result); 617 return; 618 } 619 } 620 } 621 } 622 623 return Default(); 624 } 625 626 // 627 // Handling intrinsics for circular load and bitreverse load. 628 // 629 void HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) { 630 if (MachineSDNode *L = LoadInstrForLoadIntrinsic(N)) { 631 StoreInstrForLoadIntrinsic(L, N); 632 CurDAG->RemoveDeadNode(N); 633 return; 634 } 635 636 // Handle bit-reverse load intrinsics. 637 if (SelectBrevLdIntrinsic(N)) 638 return; 639 640 if (SelectNewCircIntrinsic(N)) 641 return; 642 643 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 644 if (IntNo == Intrinsic::hexagon_V6_vgathermw || 645 IntNo == Intrinsic::hexagon_V6_vgathermw_128B || 646 IntNo == Intrinsic::hexagon_V6_vgathermh || 647 IntNo == Intrinsic::hexagon_V6_vgathermh_128B || 648 IntNo == Intrinsic::hexagon_V6_vgathermhw || 649 IntNo == Intrinsic::hexagon_V6_vgathermhw_128B) { 650 SelectV65Gather(N); 651 return; 652 } 653 if (IntNo == Intrinsic::hexagon_V6_vgathermwq || 654 IntNo == Intrinsic::hexagon_V6_vgathermwq_128B || 655 IntNo == Intrinsic::hexagon_V6_vgathermhq || 656 IntNo == Intrinsic::hexagon_V6_vgathermhq_128B || 657 IntNo == Intrinsic::hexagon_V6_vgathermhwq || 658 IntNo == Intrinsic::hexagon_V6_vgathermhwq_128B) { 659 SelectV65GatherPred(N); 660 return; 661 } 662 663 SelectCode(N); 664 } 665 666 void HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) { 667 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 668 unsigned Bits; 669 switch (IID) { 670 case Intrinsic::hexagon_S2_vsplatrb: 671 Bits = 8; 672 break; 673 case Intrinsic::hexagon_S2_vsplatrh: 674 Bits = 16; 675 break; 676 case Intrinsic::hexagon_V6_vaddcarry: 677 case Intrinsic::hexagon_V6_vaddcarry_128B: 678 case Intrinsic::hexagon_V6_vsubcarry: 679 case Intrinsic::hexagon_V6_vsubcarry_128B: 680 SelectHVXDualOutput(N); 681 return; 682 default: 683 SelectCode(N); 684 return; 685 } 686 687 SDValue V = N->getOperand(1); 688 SDValue U; 689 if (keepsLowBits(V, Bits, U)) { 690 SDValue R = CurDAG->getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), 691 N->getOperand(0), U); 692 ReplaceNode(N, R.getNode()); 693 SelectCode(R.getNode()); 694 return; 695 } 696 SelectCode(N); 697 } 698 699 // 700 // Map floating point constant values. 701 // 702 void HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) { 703 SDLoc dl(N); 704 ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N); 705 APInt A = CN->getValueAPF().bitcastToAPInt(); 706 if (N->getValueType(0) == MVT::f32) { 707 SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i32); 708 ReplaceNode(N, CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::f32, V)); 709 return; 710 } 711 if (N->getValueType(0) == MVT::f64) { 712 SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i64); 713 ReplaceNode(N, CurDAG->getMachineNode(Hexagon::CONST64, dl, MVT::f64, V)); 714 return; 715 } 716 717 SelectCode(N); 718 } 719 720 // 721 // Map boolean values. 722 // 723 void HexagonDAGToDAGISel::SelectConstant(SDNode *N) { 724 if (N->getValueType(0) == MVT::i1) { 725 assert(!(cast<ConstantSDNode>(N)->getZExtValue() >> 1)); 726 unsigned Opc = (cast<ConstantSDNode>(N)->getSExtValue() != 0) 727 ? Hexagon::PS_true 728 : Hexagon::PS_false; 729 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i1)); 730 return; 731 } 732 733 SelectCode(N); 734 } 735 736 void HexagonDAGToDAGISel::SelectFrameIndex(SDNode *N) { 737 MachineFrameInfo &MFI = MF->getFrameInfo(); 738 const HexagonFrameLowering *HFI = HST->getFrameLowering(); 739 int FX = cast<FrameIndexSDNode>(N)->getIndex(); 740 unsigned StkA = HFI->getStackAlignment(); 741 unsigned MaxA = MFI.getMaxAlignment(); 742 SDValue FI = CurDAG->getTargetFrameIndex(FX, MVT::i32); 743 SDLoc DL(N); 744 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 745 SDNode *R = nullptr; 746 747 // Use PS_fi when: 748 // - the object is fixed, or 749 // - there are no objects with higher-than-default alignment, or 750 // - there are no dynamically allocated objects. 751 // Otherwise, use PS_fia. 752 if (FX < 0 || MaxA <= StkA || !MFI.hasVarSizedObjects()) { 753 R = CurDAG->getMachineNode(Hexagon::PS_fi, DL, MVT::i32, FI, Zero); 754 } else { 755 auto &HMFI = *MF->getInfo<HexagonMachineFunctionInfo>(); 756 unsigned AR = HMFI.getStackAlignBaseVReg(); 757 SDValue CH = CurDAG->getEntryNode(); 758 SDValue Ops[] = { CurDAG->getCopyFromReg(CH, DL, AR, MVT::i32), FI, Zero }; 759 R = CurDAG->getMachineNode(Hexagon::PS_fia, DL, MVT::i32, Ops); 760 } 761 762 ReplaceNode(N, R); 763 } 764 765 void HexagonDAGToDAGISel::SelectAddSubCarry(SDNode *N) { 766 unsigned OpcCarry = N->getOpcode() == HexagonISD::ADDC ? Hexagon::A4_addp_c 767 : Hexagon::A4_subp_c; 768 SDNode *C = CurDAG->getMachineNode(OpcCarry, SDLoc(N), N->getVTList(), 769 { N->getOperand(0), N->getOperand(1), 770 N->getOperand(2) }); 771 ReplaceNode(N, C); 772 } 773 774 void HexagonDAGToDAGISel::SelectVAlign(SDNode *N) { 775 MVT ResTy = N->getValueType(0).getSimpleVT(); 776 if (HST->isHVXVectorType(ResTy, true)) 777 return SelectHvxVAlign(N); 778 779 const SDLoc &dl(N); 780 unsigned VecLen = ResTy.getSizeInBits(); 781 if (VecLen == 32) { 782 SDValue Ops[] = { 783 CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32), 784 N->getOperand(0), 785 CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32), 786 N->getOperand(1), 787 CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32) 788 }; 789 SDNode *R = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, 790 MVT::i64, Ops); 791 792 // Shift right by "(Addr & 0x3) * 8" bytes. 793 SDValue M0 = CurDAG->getTargetConstant(0x18, dl, MVT::i32); 794 SDValue M1 = CurDAG->getTargetConstant(0x03, dl, MVT::i32); 795 SDNode *C = CurDAG->getMachineNode(Hexagon::S4_andi_asl_ri, dl, MVT::i32, 796 M0, N->getOperand(2), M1); 797 SDNode *S = CurDAG->getMachineNode(Hexagon::S2_lsr_r_p, dl, MVT::i64, 798 SDValue(R, 0), SDValue(C, 0)); 799 SDValue E = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo, dl, ResTy, 800 SDValue(S, 0)); 801 ReplaceNode(N, E.getNode()); 802 } else { 803 assert(VecLen == 64); 804 SDNode *Pu = CurDAG->getMachineNode(Hexagon::C2_tfrrp, dl, MVT::v8i1, 805 N->getOperand(2)); 806 SDNode *VA = CurDAG->getMachineNode(Hexagon::S2_valignrb, dl, ResTy, 807 N->getOperand(0), N->getOperand(1), 808 SDValue(Pu,0)); 809 ReplaceNode(N, VA); 810 } 811 } 812 813 void HexagonDAGToDAGISel::SelectVAlignAddr(SDNode *N) { 814 const SDLoc &dl(N); 815 SDValue A = N->getOperand(1); 816 int Mask = -cast<ConstantSDNode>(A.getNode())->getSExtValue(); 817 assert(isPowerOf2_32(-Mask)); 818 819 SDValue M = CurDAG->getTargetConstant(Mask, dl, MVT::i32); 820 SDNode *AA = CurDAG->getMachineNode(Hexagon::A2_andir, dl, MVT::i32, 821 N->getOperand(0), M); 822 ReplaceNode(N, AA); 823 } 824 825 // Handle these nodes here to avoid having to write patterns for all 826 // combinations of input/output types. In all cases, the resulting 827 // instruction is the same. 828 void HexagonDAGToDAGISel::SelectTypecast(SDNode *N) { 829 SDValue Op = N->getOperand(0); 830 MVT OpTy = Op.getValueType().getSimpleVT(); 831 SDNode *T = CurDAG->MorphNodeTo(N, N->getOpcode(), 832 CurDAG->getVTList(OpTy), {Op}); 833 ReplaceNode(T, Op.getNode()); 834 } 835 836 void HexagonDAGToDAGISel::SelectP2D(SDNode *N) { 837 MVT ResTy = N->getValueType(0).getSimpleVT(); 838 SDNode *T = CurDAG->getMachineNode(Hexagon::C2_mask, SDLoc(N), ResTy, 839 N->getOperand(0)); 840 ReplaceNode(N, T); 841 } 842 843 void HexagonDAGToDAGISel::SelectD2P(SDNode *N) { 844 const SDLoc &dl(N); 845 MVT ResTy = N->getValueType(0).getSimpleVT(); 846 SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32); 847 SDNode *T = CurDAG->getMachineNode(Hexagon::A4_vcmpbgtui, dl, ResTy, 848 N->getOperand(0), Zero); 849 ReplaceNode(N, T); 850 } 851 852 void HexagonDAGToDAGISel::SelectV2Q(SDNode *N) { 853 const SDLoc &dl(N); 854 MVT ResTy = N->getValueType(0).getSimpleVT(); 855 856 SDValue C = CurDAG->getTargetConstant(-1, dl, MVT::i32); 857 SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C); 858 SDNode *T = CurDAG->getMachineNode(Hexagon::V6_vandvrt, dl, ResTy, 859 N->getOperand(0), SDValue(R,0)); 860 ReplaceNode(N, T); 861 } 862 863 void HexagonDAGToDAGISel::SelectQ2V(SDNode *N) { 864 const SDLoc &dl(N); 865 MVT ResTy = N->getValueType(0).getSimpleVT(); 866 867 SDValue C = CurDAG->getTargetConstant(-1, dl, MVT::i32); 868 SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C); 869 SDNode *T = CurDAG->getMachineNode(Hexagon::V6_vandqrt, dl, ResTy, 870 N->getOperand(0), SDValue(R,0)); 871 ReplaceNode(N, T); 872 } 873 874 void HexagonDAGToDAGISel::Select(SDNode *N) { 875 if (N->isMachineOpcode()) 876 return N->setNodeId(-1); // Already selected. 877 878 switch (N->getOpcode()) { 879 case ISD::Constant: return SelectConstant(N); 880 case ISD::ConstantFP: return SelectConstantFP(N); 881 case ISD::FrameIndex: return SelectFrameIndex(N); 882 case ISD::SHL: return SelectSHL(N); 883 case ISD::LOAD: return SelectLoad(N); 884 case ISD::STORE: return SelectStore(N); 885 case ISD::INTRINSIC_W_CHAIN: return SelectIntrinsicWChain(N); 886 case ISD::INTRINSIC_WO_CHAIN: return SelectIntrinsicWOChain(N); 887 888 case HexagonISD::ADDC: 889 case HexagonISD::SUBC: return SelectAddSubCarry(N); 890 case HexagonISD::VALIGN: return SelectVAlign(N); 891 case HexagonISD::VALIGNADDR: return SelectVAlignAddr(N); 892 case HexagonISD::TYPECAST: return SelectTypecast(N); 893 case HexagonISD::P2D: return SelectP2D(N); 894 case HexagonISD::D2P: return SelectD2P(N); 895 case HexagonISD::Q2V: return SelectQ2V(N); 896 case HexagonISD::V2Q: return SelectV2Q(N); 897 } 898 899 if (HST->useHVXOps()) { 900 switch (N->getOpcode()) { 901 case ISD::VECTOR_SHUFFLE: return SelectHvxShuffle(N); 902 case HexagonISD::VROR: return SelectHvxRor(N); 903 } 904 } 905 906 SelectCode(N); 907 } 908 909 bool HexagonDAGToDAGISel:: 910 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 911 std::vector<SDValue> &OutOps) { 912 SDValue Inp = Op, Res; 913 914 switch (ConstraintID) { 915 default: 916 return true; 917 case InlineAsm::Constraint_i: 918 case InlineAsm::Constraint_o: // Offsetable. 919 case InlineAsm::Constraint_v: // Not offsetable. 920 case InlineAsm::Constraint_m: // Memory. 921 if (SelectAddrFI(Inp, Res)) 922 OutOps.push_back(Res); 923 else 924 OutOps.push_back(Inp); 925 break; 926 } 927 928 OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); 929 return false; 930 } 931 932 933 static bool isMemOPCandidate(SDNode *I, SDNode *U) { 934 // I is an operand of U. Check if U is an arithmetic (binary) operation 935 // usable in a memop, where the other operand is a loaded value, and the 936 // result of U is stored in the same location. 937 938 if (!U->hasOneUse()) 939 return false; 940 unsigned Opc = U->getOpcode(); 941 switch (Opc) { 942 case ISD::ADD: 943 case ISD::SUB: 944 case ISD::AND: 945 case ISD::OR: 946 break; 947 default: 948 return false; 949 } 950 951 SDValue S0 = U->getOperand(0); 952 SDValue S1 = U->getOperand(1); 953 SDValue SY = (S0.getNode() == I) ? S1 : S0; 954 955 SDNode *UUse = *U->use_begin(); 956 if (UUse->getNumValues() != 1) 957 return false; 958 959 // Check if one of the inputs to U is a load instruction and the output 960 // is used by a store instruction. If so and they also have the same 961 // base pointer, then don't preoprocess this node sequence as it 962 // can be matched to a memop. 963 SDNode *SYNode = SY.getNode(); 964 if (UUse->getOpcode() == ISD::STORE && SYNode->getOpcode() == ISD::LOAD) { 965 SDValue LDBasePtr = cast<MemSDNode>(SYNode)->getBasePtr(); 966 SDValue STBasePtr = cast<MemSDNode>(UUse)->getBasePtr(); 967 if (LDBasePtr == STBasePtr) 968 return true; 969 } 970 return false; 971 } 972 973 974 // Transform: (or (select c x 0) z) -> (select c (or x z) z) 975 // (or (select c 0 y) z) -> (select c z (or y z)) 976 void HexagonDAGToDAGISel::ppSimplifyOrSelect0(std::vector<SDNode*> &&Nodes) { 977 SelectionDAG &DAG = *CurDAG; 978 979 for (auto I : Nodes) { 980 if (I->getOpcode() != ISD::OR) 981 continue; 982 983 auto IsZero = [] (const SDValue &V) -> bool { 984 if (ConstantSDNode *SC = dyn_cast<ConstantSDNode>(V.getNode())) 985 return SC->isNullValue(); 986 return false; 987 }; 988 auto IsSelect0 = [IsZero] (const SDValue &Op) -> bool { 989 if (Op.getOpcode() != ISD::SELECT) 990 return false; 991 return IsZero(Op.getOperand(1)) || IsZero(Op.getOperand(2)); 992 }; 993 994 SDValue N0 = I->getOperand(0), N1 = I->getOperand(1); 995 EVT VT = I->getValueType(0); 996 bool SelN0 = IsSelect0(N0); 997 SDValue SOp = SelN0 ? N0 : N1; 998 SDValue VOp = SelN0 ? N1 : N0; 999 1000 if (SOp.getOpcode() == ISD::SELECT && SOp.getNode()->hasOneUse()) { 1001 SDValue SC = SOp.getOperand(0); 1002 SDValue SX = SOp.getOperand(1); 1003 SDValue SY = SOp.getOperand(2); 1004 SDLoc DLS = SOp; 1005 if (IsZero(SY)) { 1006 SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SX, VOp); 1007 SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, NewOr, VOp); 1008 DAG.ReplaceAllUsesWith(I, NewSel.getNode()); 1009 } else if (IsZero(SX)) { 1010 SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SY, VOp); 1011 SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, VOp, NewOr); 1012 DAG.ReplaceAllUsesWith(I, NewSel.getNode()); 1013 } 1014 } 1015 } 1016 } 1017 1018 // Transform: (store ch val (add x (add (shl y c) e))) 1019 // to: (store ch val (add x (shl (add y d) c))), 1020 // where e = (shl d c) for some integer d. 1021 // The purpose of this is to enable generation of loads/stores with 1022 // shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift 1023 // value c must be 0, 1 or 2. 1024 void HexagonDAGToDAGISel::ppAddrReorderAddShl(std::vector<SDNode*> &&Nodes) { 1025 SelectionDAG &DAG = *CurDAG; 1026 1027 for (auto I : Nodes) { 1028 if (I->getOpcode() != ISD::STORE) 1029 continue; 1030 1031 // I matched: (store ch val Off) 1032 SDValue Off = I->getOperand(2); 1033 // Off needs to match: (add x (add (shl y c) (shl d c)))) 1034 if (Off.getOpcode() != ISD::ADD) 1035 continue; 1036 // Off matched: (add x T0) 1037 SDValue T0 = Off.getOperand(1); 1038 // T0 needs to match: (add T1 T2): 1039 if (T0.getOpcode() != ISD::ADD) 1040 continue; 1041 // T0 matched: (add T1 T2) 1042 SDValue T1 = T0.getOperand(0); 1043 SDValue T2 = T0.getOperand(1); 1044 // T1 needs to match: (shl y c) 1045 if (T1.getOpcode() != ISD::SHL) 1046 continue; 1047 SDValue C = T1.getOperand(1); 1048 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(C.getNode()); 1049 if (CN == nullptr) 1050 continue; 1051 unsigned CV = CN->getZExtValue(); 1052 if (CV > 2) 1053 continue; 1054 // T2 needs to match e, where e = (shl d c) for some d. 1055 ConstantSDNode *EN = dyn_cast<ConstantSDNode>(T2.getNode()); 1056 if (EN == nullptr) 1057 continue; 1058 unsigned EV = EN->getZExtValue(); 1059 if (EV % (1 << CV) != 0) 1060 continue; 1061 unsigned DV = EV / (1 << CV); 1062 1063 // Replace T0 with: (shl (add y d) c) 1064 SDLoc DL = SDLoc(I); 1065 EVT VT = T0.getValueType(); 1066 SDValue D = DAG.getConstant(DV, DL, VT); 1067 // NewAdd = (add y d) 1068 SDValue NewAdd = DAG.getNode(ISD::ADD, DL, VT, T1.getOperand(0), D); 1069 // NewShl = (shl NewAdd c) 1070 SDValue NewShl = DAG.getNode(ISD::SHL, DL, VT, NewAdd, C); 1071 ReplaceNode(T0.getNode(), NewShl.getNode()); 1072 } 1073 } 1074 1075 // Transform: (load ch (add x (and (srl y c) Mask))) 1076 // to: (load ch (add x (shl (srl y d) d-c))) 1077 // where 1078 // Mask = 00..0 111..1 0.0 1079 // | | +-- d-c 0s, and d-c is 0, 1 or 2. 1080 // | +-------- 1s 1081 // +-------------- at most c 0s 1082 // Motivating example: 1083 // DAG combiner optimizes (add x (shl (srl y 5) 2)) 1084 // to (add x (and (srl y 3) 1FFFFFFC)) 1085 // which results in a constant-extended and(##...,lsr). This transformation 1086 // undoes this simplification for cases where the shl can be folded into 1087 // an addressing mode. 1088 void HexagonDAGToDAGISel::ppAddrRewriteAndSrl(std::vector<SDNode*> &&Nodes) { 1089 SelectionDAG &DAG = *CurDAG; 1090 1091 for (SDNode *N : Nodes) { 1092 unsigned Opc = N->getOpcode(); 1093 if (Opc != ISD::LOAD && Opc != ISD::STORE) 1094 continue; 1095 SDValue Addr = Opc == ISD::LOAD ? N->getOperand(1) : N->getOperand(2); 1096 // Addr must match: (add x T0) 1097 if (Addr.getOpcode() != ISD::ADD) 1098 continue; 1099 SDValue T0 = Addr.getOperand(1); 1100 // T0 must match: (and T1 Mask) 1101 if (T0.getOpcode() != ISD::AND) 1102 continue; 1103 1104 // We have an AND. 1105 // 1106 // Check the first operand. It must be: (srl y c). 1107 SDValue S = T0.getOperand(0); 1108 if (S.getOpcode() != ISD::SRL) 1109 continue; 1110 ConstantSDNode *SN = dyn_cast<ConstantSDNode>(S.getOperand(1).getNode()); 1111 if (SN == nullptr) 1112 continue; 1113 if (SN->getAPIntValue().getBitWidth() != 32) 1114 continue; 1115 uint32_t CV = SN->getZExtValue(); 1116 1117 // Check the second operand: the supposed mask. 1118 ConstantSDNode *MN = dyn_cast<ConstantSDNode>(T0.getOperand(1).getNode()); 1119 if (MN == nullptr) 1120 continue; 1121 if (MN->getAPIntValue().getBitWidth() != 32) 1122 continue; 1123 uint32_t Mask = MN->getZExtValue(); 1124 // Examine the mask. 1125 uint32_t TZ = countTrailingZeros(Mask); 1126 uint32_t M1 = countTrailingOnes(Mask >> TZ); 1127 uint32_t LZ = countLeadingZeros(Mask); 1128 // Trailing zeros + middle ones + leading zeros must equal the width. 1129 if (TZ + M1 + LZ != 32) 1130 continue; 1131 // The number of trailing zeros will be encoded in the addressing mode. 1132 if (TZ > 2) 1133 continue; 1134 // The number of leading zeros must be at most c. 1135 if (LZ > CV) 1136 continue; 1137 1138 // All looks good. 1139 SDValue Y = S.getOperand(0); 1140 EVT VT = Addr.getValueType(); 1141 SDLoc dl(S); 1142 // TZ = D-C, so D = TZ+C. 1143 SDValue D = DAG.getConstant(TZ+CV, dl, VT); 1144 SDValue DC = DAG.getConstant(TZ, dl, VT); 1145 SDValue NewSrl = DAG.getNode(ISD::SRL, dl, VT, Y, D); 1146 SDValue NewShl = DAG.getNode(ISD::SHL, dl, VT, NewSrl, DC); 1147 ReplaceNode(T0.getNode(), NewShl.getNode()); 1148 } 1149 } 1150 1151 // Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...) 1152 // (op ... 1 ...)) 1153 void HexagonDAGToDAGISel::ppHoistZextI1(std::vector<SDNode*> &&Nodes) { 1154 SelectionDAG &DAG = *CurDAG; 1155 1156 for (SDNode *N : Nodes) { 1157 unsigned Opc = N->getOpcode(); 1158 if (Opc != ISD::ZERO_EXTEND) 1159 continue; 1160 SDValue OpI1 = N->getOperand(0); 1161 EVT OpVT = OpI1.getValueType(); 1162 if (!OpVT.isSimple() || OpVT.getSimpleVT() != MVT::i1) 1163 continue; 1164 for (auto I = N->use_begin(), E = N->use_end(); I != E; ++I) { 1165 SDNode *U = *I; 1166 if (U->getNumValues() != 1) 1167 continue; 1168 EVT UVT = U->getValueType(0); 1169 if (!UVT.isSimple() || !UVT.isInteger() || UVT.getSimpleVT() == MVT::i1) 1170 continue; 1171 if (isMemOPCandidate(N, U)) 1172 continue; 1173 1174 // Potentially simplifiable operation. 1175 unsigned I1N = I.getOperandNo(); 1176 SmallVector<SDValue,2> Ops(U->getNumOperands()); 1177 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) 1178 Ops[i] = U->getOperand(i); 1179 EVT BVT = Ops[I1N].getValueType(); 1180 1181 SDLoc dl(U); 1182 SDValue C0 = DAG.getConstant(0, dl, BVT); 1183 SDValue C1 = DAG.getConstant(1, dl, BVT); 1184 SDValue If0, If1; 1185 1186 if (isa<MachineSDNode>(U)) { 1187 unsigned UseOpc = U->getMachineOpcode(); 1188 Ops[I1N] = C0; 1189 If0 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0); 1190 Ops[I1N] = C1; 1191 If1 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0); 1192 } else { 1193 unsigned UseOpc = U->getOpcode(); 1194 Ops[I1N] = C0; 1195 If0 = DAG.getNode(UseOpc, dl, UVT, Ops); 1196 Ops[I1N] = C1; 1197 If1 = DAG.getNode(UseOpc, dl, UVT, Ops); 1198 } 1199 SDValue Sel = DAG.getNode(ISD::SELECT, dl, UVT, OpI1, If1, If0); 1200 DAG.ReplaceAllUsesWith(U, Sel.getNode()); 1201 } 1202 } 1203 } 1204 1205 void HexagonDAGToDAGISel::PreprocessISelDAG() { 1206 // Repack all nodes before calling each preprocessing function, 1207 // because each of them can modify the set of nodes. 1208 auto getNodes = [this] () -> std::vector<SDNode*> { 1209 std::vector<SDNode*> T; 1210 T.reserve(CurDAG->allnodes_size()); 1211 for (SDNode &N : CurDAG->allnodes()) 1212 T.push_back(&N); 1213 return T; 1214 }; 1215 1216 // Transform: (or (select c x 0) z) -> (select c (or x z) z) 1217 // (or (select c 0 y) z) -> (select c z (or y z)) 1218 ppSimplifyOrSelect0(getNodes()); 1219 1220 // Transform: (store ch val (add x (add (shl y c) e))) 1221 // to: (store ch val (add x (shl (add y d) c))), 1222 // where e = (shl d c) for some integer d. 1223 // The purpose of this is to enable generation of loads/stores with 1224 // shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift 1225 // value c must be 0, 1 or 2. 1226 ppAddrReorderAddShl(getNodes()); 1227 1228 // Transform: (load ch (add x (and (srl y c) Mask))) 1229 // to: (load ch (add x (shl (srl y d) d-c))) 1230 // where 1231 // Mask = 00..0 111..1 0.0 1232 // | | +-- d-c 0s, and d-c is 0, 1 or 2. 1233 // | +-------- 1s 1234 // +-------------- at most c 0s 1235 // Motivating example: 1236 // DAG combiner optimizes (add x (shl (srl y 5) 2)) 1237 // to (add x (and (srl y 3) 1FFFFFFC)) 1238 // which results in a constant-extended and(##...,lsr). This transformation 1239 // undoes this simplification for cases where the shl can be folded into 1240 // an addressing mode. 1241 ppAddrRewriteAndSrl(getNodes()); 1242 1243 // Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...) 1244 // (op ... 1 ...)) 1245 ppHoistZextI1(getNodes()); 1246 1247 DEBUG_WITH_TYPE("isel", { 1248 dbgs() << "Preprocessed (Hexagon) selection DAG:"; 1249 CurDAG->dump(); 1250 }); 1251 1252 if (EnableAddressRebalancing) { 1253 rebalanceAddressTrees(); 1254 1255 DEBUG_WITH_TYPE("isel", { 1256 dbgs() << "Address tree balanced selection DAG:"; 1257 CurDAG->dump(); 1258 }); 1259 } 1260 } 1261 1262 void HexagonDAGToDAGISel::EmitFunctionEntryCode() { 1263 auto &HST = static_cast<const HexagonSubtarget&>(MF->getSubtarget()); 1264 auto &HFI = *HST.getFrameLowering(); 1265 if (!HFI.needsAligna(*MF)) 1266 return; 1267 1268 MachineFrameInfo &MFI = MF->getFrameInfo(); 1269 MachineBasicBlock *EntryBB = &MF->front(); 1270 unsigned AR = FuncInfo->CreateReg(MVT::i32); 1271 unsigned MaxA = MFI.getMaxAlignment(); 1272 BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::PS_aligna), AR) 1273 .addImm(MaxA); 1274 MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseVReg(AR); 1275 } 1276 1277 // Match a frame index that can be used in an addressing mode. 1278 bool HexagonDAGToDAGISel::SelectAddrFI(SDValue &N, SDValue &R) { 1279 if (N.getOpcode() != ISD::FrameIndex) 1280 return false; 1281 auto &HFI = *HST->getFrameLowering(); 1282 MachineFrameInfo &MFI = MF->getFrameInfo(); 1283 int FX = cast<FrameIndexSDNode>(N)->getIndex(); 1284 if (!MFI.isFixedObjectIndex(FX) && HFI.needsAligna(*MF)) 1285 return false; 1286 R = CurDAG->getTargetFrameIndex(FX, MVT::i32); 1287 return true; 1288 } 1289 1290 inline bool HexagonDAGToDAGISel::SelectAddrGA(SDValue &N, SDValue &R) { 1291 return SelectGlobalAddress(N, R, false, 0); 1292 } 1293 1294 inline bool HexagonDAGToDAGISel::SelectAddrGP(SDValue &N, SDValue &R) { 1295 return SelectGlobalAddress(N, R, true, 0); 1296 } 1297 1298 inline bool HexagonDAGToDAGISel::SelectAnyImm(SDValue &N, SDValue &R) { 1299 return SelectAnyImmediate(N, R, 0); 1300 } 1301 1302 inline bool HexagonDAGToDAGISel::SelectAnyImm0(SDValue &N, SDValue &R) { 1303 return SelectAnyImmediate(N, R, 0); 1304 } 1305 inline bool HexagonDAGToDAGISel::SelectAnyImm1(SDValue &N, SDValue &R) { 1306 return SelectAnyImmediate(N, R, 1); 1307 } 1308 inline bool HexagonDAGToDAGISel::SelectAnyImm2(SDValue &N, SDValue &R) { 1309 return SelectAnyImmediate(N, R, 2); 1310 } 1311 inline bool HexagonDAGToDAGISel::SelectAnyImm3(SDValue &N, SDValue &R) { 1312 return SelectAnyImmediate(N, R, 3); 1313 } 1314 1315 inline bool HexagonDAGToDAGISel::SelectAnyInt(SDValue &N, SDValue &R) { 1316 EVT T = N.getValueType(); 1317 if (!T.isInteger() || T.getSizeInBits() != 32 || !isa<ConstantSDNode>(N)) 1318 return false; 1319 R = N; 1320 return true; 1321 } 1322 1323 bool HexagonDAGToDAGISel::SelectAnyImmediate(SDValue &N, SDValue &R, 1324 uint32_t LogAlign) { 1325 auto IsAligned = [LogAlign] (uint64_t V) -> bool { 1326 return alignTo(V, (uint64_t)1 << LogAlign) == V; 1327 }; 1328 1329 switch (N.getOpcode()) { 1330 case ISD::Constant: { 1331 if (N.getValueType() != MVT::i32) 1332 return false; 1333 int32_t V = cast<const ConstantSDNode>(N)->getZExtValue(); 1334 if (!IsAligned(V)) 1335 return false; 1336 R = CurDAG->getTargetConstant(V, SDLoc(N), N.getValueType()); 1337 return true; 1338 } 1339 case HexagonISD::JT: 1340 case HexagonISD::CP: 1341 // These are assumed to always be aligned at least 8-byte boundary. 1342 if (LogAlign > 3) 1343 return false; 1344 R = N.getOperand(0); 1345 return true; 1346 case ISD::ExternalSymbol: 1347 // Symbols may be aligned at any boundary. 1348 if (LogAlign > 0) 1349 return false; 1350 R = N; 1351 return true; 1352 case ISD::BlockAddress: 1353 // Block address is always aligned at least 4-byte boundary. 1354 if (LogAlign > 2 || !IsAligned(cast<BlockAddressSDNode>(N)->getOffset())) 1355 return false; 1356 R = N; 1357 return true; 1358 } 1359 1360 if (SelectGlobalAddress(N, R, false, LogAlign) || 1361 SelectGlobalAddress(N, R, true, LogAlign)) 1362 return true; 1363 1364 return false; 1365 } 1366 1367 bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R, 1368 bool UseGP, uint32_t LogAlign) { 1369 auto IsAligned = [LogAlign] (uint64_t V) -> bool { 1370 return alignTo(V, (uint64_t)1 << LogAlign) == V; 1371 }; 1372 1373 switch (N.getOpcode()) { 1374 case ISD::ADD: { 1375 SDValue N0 = N.getOperand(0); 1376 SDValue N1 = N.getOperand(1); 1377 unsigned GAOpc = N0.getOpcode(); 1378 if (UseGP && GAOpc != HexagonISD::CONST32_GP) 1379 return false; 1380 if (!UseGP && GAOpc != HexagonISD::CONST32) 1381 return false; 1382 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1)) { 1383 SDValue Addr = N0.getOperand(0); 1384 // For the purpose of alignment, sextvalue and zextvalue are the same. 1385 if (!IsAligned(Const->getZExtValue())) 1386 return false; 1387 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Addr)) { 1388 if (GA->getOpcode() == ISD::TargetGlobalAddress) { 1389 uint64_t NewOff = GA->getOffset() + (uint64_t)Const->getSExtValue(); 1390 R = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(Const), 1391 N.getValueType(), NewOff); 1392 return true; 1393 } 1394 } 1395 } 1396 break; 1397 } 1398 case HexagonISD::CP: 1399 case HexagonISD::JT: 1400 case HexagonISD::CONST32: 1401 // The operand(0) of CONST32 is TargetGlobalAddress, which is what we 1402 // want in the instruction. 1403 if (!UseGP) 1404 R = N.getOperand(0); 1405 return !UseGP; 1406 case HexagonISD::CONST32_GP: 1407 if (UseGP) 1408 R = N.getOperand(0); 1409 return UseGP; 1410 default: 1411 return false; 1412 } 1413 1414 return false; 1415 } 1416 1417 bool HexagonDAGToDAGISel::DetectUseSxtw(SDValue &N, SDValue &R) { 1418 // This (complex pattern) function is meant to detect a sign-extension 1419 // i32->i64 on a per-operand basis. This would allow writing single 1420 // patterns that would cover a number of combinations of different ways 1421 // a sign-extensions could be written. For example: 1422 // (mul (DetectUseSxtw x) (DetectUseSxtw y)) -> (M2_dpmpyss_s0 x y) 1423 // could match either one of these: 1424 // (mul (sext x) (sext_inreg y)) 1425 // (mul (sext-load *p) (sext_inreg y)) 1426 // (mul (sext_inreg x) (sext y)) 1427 // etc. 1428 // 1429 // The returned value will have type i64 and its low word will 1430 // contain the value being extended. The high bits are not specified. 1431 // The returned type is i64 because the original type of N was i64, 1432 // but the users of this function should only use the low-word of the 1433 // result, e.g. 1434 // (mul sxtw:x, sxtw:y) -> (M2_dpmpyss_s0 (LoReg sxtw:x), (LoReg sxtw:y)) 1435 1436 if (N.getValueType() != MVT::i64) 1437 return false; 1438 unsigned Opc = N.getOpcode(); 1439 switch (Opc) { 1440 case ISD::SIGN_EXTEND: 1441 case ISD::SIGN_EXTEND_INREG: { 1442 // sext_inreg has the source type as a separate operand. 1443 EVT T = Opc == ISD::SIGN_EXTEND 1444 ? N.getOperand(0).getValueType() 1445 : cast<VTSDNode>(N.getOperand(1))->getVT(); 1446 unsigned SW = T.getSizeInBits(); 1447 if (SW == 32) 1448 R = N.getOperand(0); 1449 else if (SW < 32) 1450 R = N; 1451 else 1452 return false; 1453 break; 1454 } 1455 case ISD::LOAD: { 1456 LoadSDNode *L = cast<LoadSDNode>(N); 1457 if (L->getExtensionType() != ISD::SEXTLOAD) 1458 return false; 1459 // All extending loads extend to i32, so even if the value in 1460 // memory is shorter than 32 bits, it will be i32 after the load. 1461 if (L->getMemoryVT().getSizeInBits() > 32) 1462 return false; 1463 R = N; 1464 break; 1465 } 1466 case ISD::SRA: { 1467 auto *S = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1468 if (!S || S->getZExtValue() != 32) 1469 return false; 1470 R = N; 1471 break; 1472 } 1473 default: 1474 return false; 1475 } 1476 EVT RT = R.getValueType(); 1477 if (RT == MVT::i64) 1478 return true; 1479 assert(RT == MVT::i32); 1480 // This is only to produce a value of type i64. Do not rely on the 1481 // high bits produced by this. 1482 const SDLoc &dl(N); 1483 SDValue Ops[] = { 1484 CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32), 1485 R, CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32), 1486 R, CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32) 1487 }; 1488 SDNode *T = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, 1489 MVT::i64, Ops); 1490 R = SDValue(T, 0); 1491 return true; 1492 } 1493 1494 bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits, 1495 SDValue &Src) { 1496 unsigned Opc = Val.getOpcode(); 1497 switch (Opc) { 1498 case ISD::SIGN_EXTEND: 1499 case ISD::ZERO_EXTEND: 1500 case ISD::ANY_EXTEND: { 1501 const SDValue &Op0 = Val.getOperand(0); 1502 EVT T = Op0.getValueType(); 1503 if (T.isInteger() && T.getSizeInBits() == NumBits) { 1504 Src = Op0; 1505 return true; 1506 } 1507 break; 1508 } 1509 case ISD::SIGN_EXTEND_INREG: 1510 case ISD::AssertSext: 1511 case ISD::AssertZext: 1512 if (Val.getOperand(0).getValueType().isInteger()) { 1513 VTSDNode *T = cast<VTSDNode>(Val.getOperand(1)); 1514 if (T->getVT().getSizeInBits() == NumBits) { 1515 Src = Val.getOperand(0); 1516 return true; 1517 } 1518 } 1519 break; 1520 case ISD::AND: { 1521 // Check if this is an AND with NumBits of lower bits set to 1. 1522 uint64_t Mask = (1 << NumBits) - 1; 1523 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) { 1524 if (C->getZExtValue() == Mask) { 1525 Src = Val.getOperand(1); 1526 return true; 1527 } 1528 } 1529 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) { 1530 if (C->getZExtValue() == Mask) { 1531 Src = Val.getOperand(0); 1532 return true; 1533 } 1534 } 1535 break; 1536 } 1537 case ISD::OR: 1538 case ISD::XOR: { 1539 // OR/XOR with the lower NumBits bits set to 0. 1540 uint64_t Mask = (1 << NumBits) - 1; 1541 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) { 1542 if ((C->getZExtValue() & Mask) == 0) { 1543 Src = Val.getOperand(1); 1544 return true; 1545 } 1546 } 1547 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) { 1548 if ((C->getZExtValue() & Mask) == 0) { 1549 Src = Val.getOperand(0); 1550 return true; 1551 } 1552 } 1553 break; 1554 } 1555 default: 1556 break; 1557 } 1558 return false; 1559 } 1560 1561 bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const { 1562 return N->getAlignment() >= N->getMemoryVT().getStoreSize(); 1563 } 1564 1565 bool HexagonDAGToDAGISel::isSmallStackStore(const StoreSDNode *N) const { 1566 unsigned StackSize = MF->getFrameInfo().estimateStackSize(*MF); 1567 switch (N->getMemoryVT().getStoreSize()) { 1568 case 1: 1569 return StackSize <= 56; // 1*2^6 - 8 1570 case 2: 1571 return StackSize <= 120; // 2*2^6 - 8 1572 case 4: 1573 return StackSize <= 248; // 4*2^6 - 8 1574 default: 1575 return false; 1576 } 1577 } 1578 1579 // Return true when the given node fits in a positive half word. 1580 bool HexagonDAGToDAGISel::isPositiveHalfWord(const SDNode *N) const { 1581 if (const ConstantSDNode *CN = dyn_cast<const ConstantSDNode>(N)) { 1582 int64_t V = CN->getSExtValue(); 1583 return V > 0 && isInt<16>(V); 1584 } 1585 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) { 1586 const VTSDNode *VN = dyn_cast<const VTSDNode>(N->getOperand(1)); 1587 return VN->getVT().getSizeInBits() <= 16; 1588 } 1589 return false; 1590 } 1591 1592 bool HexagonDAGToDAGISel::hasOneUse(const SDNode *N) const { 1593 return !CheckSingleUse || N->hasOneUse(); 1594 } 1595 1596 //////////////////////////////////////////////////////////////////////////////// 1597 // Rebalancing of address calculation trees 1598 1599 static bool isOpcodeHandled(const SDNode *N) { 1600 switch (N->getOpcode()) { 1601 case ISD::ADD: 1602 case ISD::MUL: 1603 return true; 1604 case ISD::SHL: 1605 // We only handle constant shifts because these can be easily flattened 1606 // into multiplications by 2^Op1. 1607 return isa<ConstantSDNode>(N->getOperand(1).getNode()); 1608 default: 1609 return false; 1610 } 1611 } 1612 1613 /// Return the weight of an SDNode 1614 int HexagonDAGToDAGISel::getWeight(SDNode *N) { 1615 if (!isOpcodeHandled(N)) 1616 return 1; 1617 assert(RootWeights.count(N) && "Cannot get weight of unseen root!"); 1618 assert(RootWeights[N] != -1 && "Cannot get weight of unvisited root!"); 1619 assert(RootWeights[N] != -2 && "Cannot get weight of RAWU'd root!"); 1620 return RootWeights[N]; 1621 } 1622 1623 int HexagonDAGToDAGISel::getHeight(SDNode *N) { 1624 if (!isOpcodeHandled(N)) 1625 return 0; 1626 assert(RootWeights.count(N) && RootWeights[N] >= 0 && 1627 "Cannot query height of unvisited/RAUW'd node!"); 1628 return RootHeights[N]; 1629 } 1630 1631 namespace { 1632 struct WeightedLeaf { 1633 SDValue Value; 1634 int Weight; 1635 int InsertionOrder; 1636 1637 WeightedLeaf() : Value(SDValue()) { } 1638 1639 WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) : 1640 Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) { 1641 assert(Weight >= 0 && "Weight must be >= 0"); 1642 } 1643 1644 static bool Compare(const WeightedLeaf &A, const WeightedLeaf &B) { 1645 assert(A.Value.getNode() && B.Value.getNode()); 1646 return A.Weight == B.Weight ? 1647 (A.InsertionOrder > B.InsertionOrder) : 1648 (A.Weight > B.Weight); 1649 } 1650 }; 1651 1652 /// A specialized priority queue for WeigthedLeaves. It automatically folds 1653 /// constants and allows removal of non-top elements while maintaining the 1654 /// priority order. 1655 class LeafPrioQueue { 1656 SmallVector<WeightedLeaf, 8> Q; 1657 bool HaveConst; 1658 WeightedLeaf ConstElt; 1659 unsigned Opcode; 1660 1661 public: 1662 bool empty() { 1663 return (!HaveConst && Q.empty()); 1664 } 1665 1666 size_t size() { 1667 return Q.size() + HaveConst; 1668 } 1669 1670 bool hasConst() { 1671 return HaveConst; 1672 } 1673 1674 const WeightedLeaf &top() { 1675 if (HaveConst) 1676 return ConstElt; 1677 return Q.front(); 1678 } 1679 1680 WeightedLeaf pop() { 1681 if (HaveConst) { 1682 HaveConst = false; 1683 return ConstElt; 1684 } 1685 std::pop_heap(Q.begin(), Q.end(), WeightedLeaf::Compare); 1686 return Q.pop_back_val(); 1687 } 1688 1689 void push(WeightedLeaf L, bool SeparateConst=true) { 1690 if (!HaveConst && SeparateConst && isa<ConstantSDNode>(L.Value)) { 1691 if (Opcode == ISD::MUL && 1692 cast<ConstantSDNode>(L.Value)->getSExtValue() == 1) 1693 return; 1694 if (Opcode == ISD::ADD && 1695 cast<ConstantSDNode>(L.Value)->getSExtValue() == 0) 1696 return; 1697 1698 HaveConst = true; 1699 ConstElt = L; 1700 } else { 1701 Q.push_back(L); 1702 std::push_heap(Q.begin(), Q.end(), WeightedLeaf::Compare); 1703 } 1704 } 1705 1706 /// Push L to the bottom of the queue regardless of its weight. If L is 1707 /// constant, it will not be folded with other constants in the queue. 1708 void pushToBottom(WeightedLeaf L) { 1709 L.Weight = 1000; 1710 push(L, false); 1711 } 1712 1713 /// Search for a SHL(x, [<=MaxAmount]) subtree in the queue, return the one of 1714 /// lowest weight and remove it from the queue. 1715 WeightedLeaf findSHL(uint64_t MaxAmount); 1716 1717 WeightedLeaf findMULbyConst(); 1718 1719 LeafPrioQueue(unsigned Opcode) : 1720 HaveConst(false), Opcode(Opcode) { } 1721 }; 1722 } // end anonymous namespace 1723 1724 WeightedLeaf LeafPrioQueue::findSHL(uint64_t MaxAmount) { 1725 int ResultPos; 1726 WeightedLeaf Result; 1727 1728 for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) { 1729 const WeightedLeaf &L = Q[Pos]; 1730 const SDValue &Val = L.Value; 1731 if (Val.getOpcode() != ISD::SHL || 1732 !isa<ConstantSDNode>(Val.getOperand(1)) || 1733 Val.getConstantOperandVal(1) > MaxAmount) 1734 continue; 1735 if (!Result.Value.getNode() || Result.Weight > L.Weight || 1736 (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder)) 1737 { 1738 Result = L; 1739 ResultPos = Pos; 1740 } 1741 } 1742 1743 if (Result.Value.getNode()) { 1744 Q.erase(&Q[ResultPos]); 1745 std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare); 1746 } 1747 1748 return Result; 1749 } 1750 1751 WeightedLeaf LeafPrioQueue::findMULbyConst() { 1752 int ResultPos; 1753 WeightedLeaf Result; 1754 1755 for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) { 1756 const WeightedLeaf &L = Q[Pos]; 1757 const SDValue &Val = L.Value; 1758 if (Val.getOpcode() != ISD::MUL || 1759 !isa<ConstantSDNode>(Val.getOperand(1)) || 1760 Val.getConstantOperandVal(1) > 127) 1761 continue; 1762 if (!Result.Value.getNode() || Result.Weight > L.Weight || 1763 (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder)) 1764 { 1765 Result = L; 1766 ResultPos = Pos; 1767 } 1768 } 1769 1770 if (Result.Value.getNode()) { 1771 Q.erase(&Q[ResultPos]); 1772 std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare); 1773 } 1774 1775 return Result; 1776 } 1777 1778 SDValue HexagonDAGToDAGISel::getMultiplierForSHL(SDNode *N) { 1779 uint64_t MulFactor = 1ull << N->getConstantOperandVal(1); 1780 return CurDAG->getConstant(MulFactor, SDLoc(N), 1781 N->getOperand(1).getValueType()); 1782 } 1783 1784 /// @returns the value x for which 2^x is a factor of Val 1785 static unsigned getPowerOf2Factor(SDValue Val) { 1786 if (Val.getOpcode() == ISD::MUL) { 1787 unsigned MaxFactor = 0; 1788 for (int i = 0; i < 2; ++i) { 1789 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(i)); 1790 if (!C) 1791 continue; 1792 const APInt &CInt = C->getAPIntValue(); 1793 if (CInt.getBoolValue()) 1794 MaxFactor = CInt.countTrailingZeros(); 1795 } 1796 return MaxFactor; 1797 } 1798 if (Val.getOpcode() == ISD::SHL) { 1799 if (!isa<ConstantSDNode>(Val.getOperand(1).getNode())) 1800 return 0; 1801 return (unsigned) Val.getConstantOperandVal(1); 1802 } 1803 1804 return 0; 1805 } 1806 1807 /// @returns true if V>>Amount will eliminate V's operation on its child 1808 static bool willShiftRightEliminate(SDValue V, unsigned Amount) { 1809 if (V.getOpcode() == ISD::MUL) { 1810 SDValue Ops[] = { V.getOperand(0), V.getOperand(1) }; 1811 for (int i = 0; i < 2; ++i) 1812 if (isa<ConstantSDNode>(Ops[i].getNode()) && 1813 V.getConstantOperandVal(i) % (1ULL << Amount) == 0) { 1814 uint64_t NewConst = V.getConstantOperandVal(i) >> Amount; 1815 return (NewConst == 1); 1816 } 1817 } else if (V.getOpcode() == ISD::SHL) { 1818 return (Amount == V.getConstantOperandVal(1)); 1819 } 1820 1821 return false; 1822 } 1823 1824 SDValue HexagonDAGToDAGISel::factorOutPowerOf2(SDValue V, unsigned Power) { 1825 SDValue Ops[] = { V.getOperand(0), V.getOperand(1) }; 1826 if (V.getOpcode() == ISD::MUL) { 1827 for (int i=0; i < 2; ++i) { 1828 if (isa<ConstantSDNode>(Ops[i].getNode()) && 1829 V.getConstantOperandVal(i) % ((uint64_t)1 << Power) == 0) { 1830 uint64_t NewConst = V.getConstantOperandVal(i) >> Power; 1831 if (NewConst == 1) 1832 return Ops[!i]; 1833 Ops[i] = CurDAG->getConstant(NewConst, 1834 SDLoc(V), V.getValueType()); 1835 break; 1836 } 1837 } 1838 } else if (V.getOpcode() == ISD::SHL) { 1839 uint64_t ShiftAmount = V.getConstantOperandVal(1); 1840 if (ShiftAmount == Power) 1841 return Ops[0]; 1842 Ops[1] = CurDAG->getConstant(ShiftAmount - Power, 1843 SDLoc(V), V.getValueType()); 1844 } 1845 1846 return CurDAG->getNode(V.getOpcode(), SDLoc(V), V.getValueType(), Ops); 1847 } 1848 1849 static bool isTargetConstant(const SDValue &V) { 1850 return V.getOpcode() == HexagonISD::CONST32 || 1851 V.getOpcode() == HexagonISD::CONST32_GP; 1852 } 1853 1854 unsigned HexagonDAGToDAGISel::getUsesInFunction(const Value *V) { 1855 if (GAUsesInFunction.count(V)) 1856 return GAUsesInFunction[V]; 1857 1858 unsigned Result = 0; 1859 const Function &CurF = CurDAG->getMachineFunction().getFunction(); 1860 for (const User *U : V->users()) { 1861 if (isa<Instruction>(U) && 1862 cast<Instruction>(U)->getParent()->getParent() == &CurF) 1863 ++Result; 1864 } 1865 1866 GAUsesInFunction[V] = Result; 1867 1868 return Result; 1869 } 1870 1871 /// Note - After calling this, N may be dead. It may have been replaced by a 1872 /// new node, so always use the returned value in place of N. 1873 /// 1874 /// @returns The SDValue taking the place of N (which could be N if it is 1875 /// unchanged) 1876 SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) { 1877 assert(RootWeights.count(N) && "Cannot balance non-root node."); 1878 assert(RootWeights[N] != -2 && "This node was RAUW'd!"); 1879 assert(!TopLevel || N->getOpcode() == ISD::ADD); 1880 1881 // Return early if this node was already visited 1882 if (RootWeights[N] != -1) 1883 return SDValue(N, 0); 1884 1885 assert(isOpcodeHandled(N)); 1886 1887 SDValue Op0 = N->getOperand(0); 1888 SDValue Op1 = N->getOperand(1); 1889 1890 // Return early if the operands will remain unchanged or are all roots 1891 if ((!isOpcodeHandled(Op0.getNode()) || RootWeights.count(Op0.getNode())) && 1892 (!isOpcodeHandled(Op1.getNode()) || RootWeights.count(Op1.getNode()))) { 1893 SDNode *Op0N = Op0.getNode(); 1894 int Weight; 1895 if (isOpcodeHandled(Op0N) && RootWeights[Op0N] == -1) { 1896 Weight = getWeight(balanceSubTree(Op0N).getNode()); 1897 // Weight = calculateWeight(Op0N); 1898 } else 1899 Weight = getWeight(Op0N); 1900 1901 SDNode *Op1N = N->getOperand(1).getNode(); // Op1 may have been RAUWd 1902 if (isOpcodeHandled(Op1N) && RootWeights[Op1N] == -1) { 1903 Weight += getWeight(balanceSubTree(Op1N).getNode()); 1904 // Weight += calculateWeight(Op1N); 1905 } else 1906 Weight += getWeight(Op1N); 1907 1908 RootWeights[N] = Weight; 1909 RootHeights[N] = std::max(getHeight(N->getOperand(0).getNode()), 1910 getHeight(N->getOperand(1).getNode())) + 1; 1911 1912 LLVM_DEBUG(dbgs() << "--> No need to balance root (Weight=" << Weight 1913 << " Height=" << RootHeights[N] << "): "); 1914 LLVM_DEBUG(N->dump(CurDAG)); 1915 1916 return SDValue(N, 0); 1917 } 1918 1919 LLVM_DEBUG(dbgs() << "** Balancing root node: "); 1920 LLVM_DEBUG(N->dump(CurDAG)); 1921 1922 unsigned NOpcode = N->getOpcode(); 1923 1924 LeafPrioQueue Leaves(NOpcode); 1925 SmallVector<SDValue, 4> Worklist; 1926 Worklist.push_back(SDValue(N, 0)); 1927 1928 // SHL nodes will be converted to MUL nodes 1929 if (NOpcode == ISD::SHL) 1930 NOpcode = ISD::MUL; 1931 1932 bool CanFactorize = false; 1933 WeightedLeaf Mul1, Mul2; 1934 unsigned MaxPowerOf2 = 0; 1935 WeightedLeaf GA; 1936 1937 // Do not try to factor out a shift if there is already a shift at the tip of 1938 // the tree. 1939 bool HaveTopLevelShift = false; 1940 if (TopLevel && 1941 ((isOpcodeHandled(Op0.getNode()) && Op0.getOpcode() == ISD::SHL && 1942 Op0.getConstantOperandVal(1) < 4) || 1943 (isOpcodeHandled(Op1.getNode()) && Op1.getOpcode() == ISD::SHL && 1944 Op1.getConstantOperandVal(1) < 4))) 1945 HaveTopLevelShift = true; 1946 1947 // Flatten the subtree into an ordered list of leaves; at the same time 1948 // determine whether the tree is already balanced. 1949 int InsertionOrder = 0; 1950 SmallDenseMap<SDValue, int> NodeHeights; 1951 bool Imbalanced = false; 1952 int CurrentWeight = 0; 1953 while (!Worklist.empty()) { 1954 SDValue Child = Worklist.pop_back_val(); 1955 1956 if (Child.getNode() != N && RootWeights.count(Child.getNode())) { 1957 // CASE 1: Child is a root note 1958 1959 int Weight = RootWeights[Child.getNode()]; 1960 if (Weight == -1) { 1961 Child = balanceSubTree(Child.getNode()); 1962 // calculateWeight(Child.getNode()); 1963 Weight = getWeight(Child.getNode()); 1964 } else if (Weight == -2) { 1965 // Whoops, this node was RAUWd by one of the balanceSubTree calls we 1966 // made. Our worklist isn't up to date anymore. 1967 // Restart the whole process. 1968 LLVM_DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n"); 1969 return balanceSubTree(N, TopLevel); 1970 } 1971 1972 NodeHeights[Child] = 1; 1973 CurrentWeight += Weight; 1974 1975 unsigned PowerOf2; 1976 if (TopLevel && !CanFactorize && !HaveTopLevelShift && 1977 (Child.getOpcode() == ISD::MUL || Child.getOpcode() == ISD::SHL) && 1978 Child.hasOneUse() && (PowerOf2 = getPowerOf2Factor(Child))) { 1979 // Try to identify two factorizable MUL/SHL children greedily. Leave 1980 // them out of the priority queue for now so we can deal with them 1981 // after. 1982 if (!Mul1.Value.getNode()) { 1983 Mul1 = WeightedLeaf(Child, Weight, InsertionOrder++); 1984 MaxPowerOf2 = PowerOf2; 1985 } else { 1986 Mul2 = WeightedLeaf(Child, Weight, InsertionOrder++); 1987 MaxPowerOf2 = std::min(MaxPowerOf2, PowerOf2); 1988 1989 // Our addressing modes can only shift by a maximum of 3 1990 if (MaxPowerOf2 > 3) 1991 MaxPowerOf2 = 3; 1992 1993 CanFactorize = true; 1994 } 1995 } else 1996 Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++)); 1997 } else if (!isOpcodeHandled(Child.getNode())) { 1998 // CASE 2: Child is an unhandled kind of node (e.g. constant) 1999 int Weight = getWeight(Child.getNode()); 2000 2001 NodeHeights[Child] = getHeight(Child.getNode()); 2002 CurrentWeight += Weight; 2003 2004 if (isTargetConstant(Child) && !GA.Value.getNode()) 2005 GA = WeightedLeaf(Child, Weight, InsertionOrder++); 2006 else 2007 Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++)); 2008 } else { 2009 // CASE 3: Child is a subtree of same opcode 2010 // Visit children first, then flatten. 2011 unsigned ChildOpcode = Child.getOpcode(); 2012 assert(ChildOpcode == NOpcode || 2013 (NOpcode == ISD::MUL && ChildOpcode == ISD::SHL)); 2014 2015 // Convert SHL to MUL 2016 SDValue Op1; 2017 if (ChildOpcode == ISD::SHL) 2018 Op1 = getMultiplierForSHL(Child.getNode()); 2019 else 2020 Op1 = Child->getOperand(1); 2021 2022 if (!NodeHeights.count(Op1) || !NodeHeights.count(Child->getOperand(0))) { 2023 assert(!NodeHeights.count(Child) && "Parent visited before children?"); 2024 // Visit children first, then re-visit this node 2025 Worklist.push_back(Child); 2026 Worklist.push_back(Op1); 2027 Worklist.push_back(Child->getOperand(0)); 2028 } else { 2029 // Back at this node after visiting the children 2030 if (std::abs(NodeHeights[Op1] - NodeHeights[Child->getOperand(0)]) > 1) 2031 Imbalanced = true; 2032 2033 NodeHeights[Child] = std::max(NodeHeights[Op1], 2034 NodeHeights[Child->getOperand(0)]) + 1; 2035 } 2036 } 2037 } 2038 2039 LLVM_DEBUG(dbgs() << "--> Current height=" << NodeHeights[SDValue(N, 0)] 2040 << " weight=" << CurrentWeight 2041 << " imbalanced=" << Imbalanced << "\n"); 2042 2043 // Transform MUL(x, C * 2^Y) + SHL(z, Y) -> SHL(ADD(MUL(x, C), z), Y) 2044 // This factors out a shift in order to match memw(a<<Y+b). 2045 if (CanFactorize && (willShiftRightEliminate(Mul1.Value, MaxPowerOf2) || 2046 willShiftRightEliminate(Mul2.Value, MaxPowerOf2))) { 2047 LLVM_DEBUG(dbgs() << "--> Found common factor for two MUL children!\n"); 2048 int Weight = Mul1.Weight + Mul2.Weight; 2049 int Height = std::max(NodeHeights[Mul1.Value], NodeHeights[Mul2.Value]) + 1; 2050 SDValue Mul1Factored = factorOutPowerOf2(Mul1.Value, MaxPowerOf2); 2051 SDValue Mul2Factored = factorOutPowerOf2(Mul2.Value, MaxPowerOf2); 2052 SDValue Sum = CurDAG->getNode(ISD::ADD, SDLoc(N), Mul1.Value.getValueType(), 2053 Mul1Factored, Mul2Factored); 2054 SDValue Const = CurDAG->getConstant(MaxPowerOf2, SDLoc(N), 2055 Mul1.Value.getValueType()); 2056 SDValue New = CurDAG->getNode(ISD::SHL, SDLoc(N), Mul1.Value.getValueType(), 2057 Sum, Const); 2058 NodeHeights[New] = Height; 2059 Leaves.push(WeightedLeaf(New, Weight, Mul1.InsertionOrder)); 2060 } else if (Mul1.Value.getNode()) { 2061 // We failed to factorize two MULs, so now the Muls are left outside the 2062 // queue... add them back. 2063 Leaves.push(Mul1); 2064 if (Mul2.Value.getNode()) 2065 Leaves.push(Mul2); 2066 CanFactorize = false; 2067 } 2068 2069 // Combine GA + Constant -> GA+Offset, but only if GA is not used elsewhere 2070 // and the root node itself is not used more than twice. This reduces the 2071 // amount of additional constant extenders introduced by this optimization. 2072 bool CombinedGA = false; 2073 if (NOpcode == ISD::ADD && GA.Value.getNode() && Leaves.hasConst() && 2074 GA.Value.hasOneUse() && N->use_size() < 3) { 2075 GlobalAddressSDNode *GANode = 2076 cast<GlobalAddressSDNode>(GA.Value.getOperand(0)); 2077 ConstantSDNode *Offset = cast<ConstantSDNode>(Leaves.top().Value); 2078 2079 if (getUsesInFunction(GANode->getGlobal()) == 1 && Offset->hasOneUse() && 2080 getTargetLowering()->isOffsetFoldingLegal(GANode)) { 2081 LLVM_DEBUG(dbgs() << "--> Combining GA and offset (" 2082 << Offset->getSExtValue() << "): "); 2083 LLVM_DEBUG(GANode->dump(CurDAG)); 2084 2085 SDValue NewTGA = 2086 CurDAG->getTargetGlobalAddress(GANode->getGlobal(), SDLoc(GA.Value), 2087 GANode->getValueType(0), 2088 GANode->getOffset() + (uint64_t)Offset->getSExtValue()); 2089 GA.Value = CurDAG->getNode(GA.Value.getOpcode(), SDLoc(GA.Value), 2090 GA.Value.getValueType(), NewTGA); 2091 GA.Weight += Leaves.top().Weight; 2092 2093 NodeHeights[GA.Value] = getHeight(GA.Value.getNode()); 2094 CombinedGA = true; 2095 2096 Leaves.pop(); // Remove the offset constant from the queue 2097 } 2098 } 2099 2100 if ((RebalanceOnlyForOptimizations && !CanFactorize && !CombinedGA) || 2101 (RebalanceOnlyImbalancedTrees && !Imbalanced)) { 2102 RootWeights[N] = CurrentWeight; 2103 RootHeights[N] = NodeHeights[SDValue(N, 0)]; 2104 2105 return SDValue(N, 0); 2106 } 2107 2108 // Combine GA + SHL(x, C<=31) so we will match Rx=add(#u8,asl(Rx,#U5)) 2109 if (NOpcode == ISD::ADD && GA.Value.getNode()) { 2110 WeightedLeaf SHL = Leaves.findSHL(31); 2111 if (SHL.Value.getNode()) { 2112 int Height = std::max(NodeHeights[GA.Value], NodeHeights[SHL.Value]) + 1; 2113 GA.Value = CurDAG->getNode(ISD::ADD, SDLoc(GA.Value), 2114 GA.Value.getValueType(), 2115 GA.Value, SHL.Value); 2116 GA.Weight = SHL.Weight; // Specifically ignore the GA weight here 2117 NodeHeights[GA.Value] = Height; 2118 } 2119 } 2120 2121 if (GA.Value.getNode()) 2122 Leaves.push(GA); 2123 2124 // If this is the top level and we haven't factored out a shift, we should try 2125 // to move a constant to the bottom to match addressing modes like memw(rX+C) 2126 if (TopLevel && !CanFactorize && Leaves.hasConst()) { 2127 LLVM_DEBUG(dbgs() << "--> Pushing constant to tip of tree."); 2128 Leaves.pushToBottom(Leaves.pop()); 2129 } 2130 2131 const DataLayout &DL = CurDAG->getDataLayout(); 2132 const TargetLowering &TLI = *getTargetLowering(); 2133 2134 // Rebuild the tree using Huffman's algorithm 2135 while (Leaves.size() > 1) { 2136 WeightedLeaf L0 = Leaves.pop(); 2137 2138 // See whether we can grab a MUL to form an add(Rx,mpyi(Ry,#u6)), 2139 // otherwise just get the next leaf 2140 WeightedLeaf L1 = Leaves.findMULbyConst(); 2141 if (!L1.Value.getNode()) 2142 L1 = Leaves.pop(); 2143 2144 assert(L0.Weight <= L1.Weight && "Priority queue is broken!"); 2145 2146 SDValue V0 = L0.Value; 2147 int V0Weight = L0.Weight; 2148 SDValue V1 = L1.Value; 2149 int V1Weight = L1.Weight; 2150 2151 // Make sure that none of these nodes have been RAUW'd 2152 if ((RootWeights.count(V0.getNode()) && RootWeights[V0.getNode()] == -2) || 2153 (RootWeights.count(V1.getNode()) && RootWeights[V1.getNode()] == -2)) { 2154 LLVM_DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n"); 2155 return balanceSubTree(N, TopLevel); 2156 } 2157 2158 ConstantSDNode *V0C = dyn_cast<ConstantSDNode>(V0); 2159 ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(V1); 2160 EVT VT = N->getValueType(0); 2161 SDValue NewNode; 2162 2163 if (V0C && !V1C) { 2164 std::swap(V0, V1); 2165 std::swap(V0C, V1C); 2166 } 2167 2168 // Calculate height of this node 2169 assert(NodeHeights.count(V0) && NodeHeights.count(V1) && 2170 "Children must have been visited before re-combining them!"); 2171 int Height = std::max(NodeHeights[V0], NodeHeights[V1]) + 1; 2172 2173 // Rebuild this node (and restore SHL from MUL if needed) 2174 if (V1C && NOpcode == ISD::MUL && V1C->getAPIntValue().isPowerOf2()) 2175 NewNode = CurDAG->getNode( 2176 ISD::SHL, SDLoc(V0), VT, V0, 2177 CurDAG->getConstant( 2178 V1C->getAPIntValue().logBase2(), SDLoc(N), 2179 TLI.getScalarShiftAmountTy(DL, V0.getValueType()))); 2180 else 2181 NewNode = CurDAG->getNode(NOpcode, SDLoc(N), VT, V0, V1); 2182 2183 NodeHeights[NewNode] = Height; 2184 2185 int Weight = V0Weight + V1Weight; 2186 Leaves.push(WeightedLeaf(NewNode, Weight, L0.InsertionOrder)); 2187 2188 LLVM_DEBUG(dbgs() << "--> Built new node (Weight=" << Weight 2189 << ",Height=" << Height << "):\n"); 2190 LLVM_DEBUG(NewNode.dump()); 2191 } 2192 2193 assert(Leaves.size() == 1); 2194 SDValue NewRoot = Leaves.top().Value; 2195 2196 assert(NodeHeights.count(NewRoot)); 2197 int Height = NodeHeights[NewRoot]; 2198 2199 // Restore SHL if we earlier converted it to a MUL 2200 if (NewRoot.getOpcode() == ISD::MUL) { 2201 ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(NewRoot.getOperand(1)); 2202 if (V1C && V1C->getAPIntValue().isPowerOf2()) { 2203 EVT VT = NewRoot.getValueType(); 2204 SDValue V0 = NewRoot.getOperand(0); 2205 NewRoot = CurDAG->getNode( 2206 ISD::SHL, SDLoc(NewRoot), VT, V0, 2207 CurDAG->getConstant( 2208 V1C->getAPIntValue().logBase2(), SDLoc(NewRoot), 2209 TLI.getScalarShiftAmountTy(DL, V0.getValueType()))); 2210 } 2211 } 2212 2213 if (N != NewRoot.getNode()) { 2214 LLVM_DEBUG(dbgs() << "--> Root is now: "); 2215 LLVM_DEBUG(NewRoot.dump()); 2216 2217 // Replace all uses of old root by new root 2218 CurDAG->ReplaceAllUsesWith(N, NewRoot.getNode()); 2219 // Mark that we have RAUW'd N 2220 RootWeights[N] = -2; 2221 } else { 2222 LLVM_DEBUG(dbgs() << "--> Root unchanged.\n"); 2223 } 2224 2225 RootWeights[NewRoot.getNode()] = Leaves.top().Weight; 2226 RootHeights[NewRoot.getNode()] = Height; 2227 2228 return NewRoot; 2229 } 2230 2231 void HexagonDAGToDAGISel::rebalanceAddressTrees() { 2232 for (auto I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E;) { 2233 SDNode *N = &*I++; 2234 if (N->getOpcode() != ISD::LOAD && N->getOpcode() != ISD::STORE) 2235 continue; 2236 2237 SDValue BasePtr = cast<MemSDNode>(N)->getBasePtr(); 2238 if (BasePtr.getOpcode() != ISD::ADD) 2239 continue; 2240 2241 // We've already processed this node 2242 if (RootWeights.count(BasePtr.getNode())) 2243 continue; 2244 2245 LLVM_DEBUG(dbgs() << "** Rebalancing address calculation in node: "); 2246 LLVM_DEBUG(N->dump(CurDAG)); 2247 2248 // FindRoots 2249 SmallVector<SDNode *, 4> Worklist; 2250 2251 Worklist.push_back(BasePtr.getOperand(0).getNode()); 2252 Worklist.push_back(BasePtr.getOperand(1).getNode()); 2253 2254 while (!Worklist.empty()) { 2255 SDNode *N = Worklist.pop_back_val(); 2256 unsigned Opcode = N->getOpcode(); 2257 2258 if (!isOpcodeHandled(N)) 2259 continue; 2260 2261 Worklist.push_back(N->getOperand(0).getNode()); 2262 Worklist.push_back(N->getOperand(1).getNode()); 2263 2264 // Not a root if it has only one use and same opcode as its parent 2265 if (N->hasOneUse() && Opcode == N->use_begin()->getOpcode()) 2266 continue; 2267 2268 // This root node has already been processed 2269 if (RootWeights.count(N)) 2270 continue; 2271 2272 RootWeights[N] = -1; 2273 } 2274 2275 // Balance node itself 2276 RootWeights[BasePtr.getNode()] = -1; 2277 SDValue NewBasePtr = balanceSubTree(BasePtr.getNode(), /*TopLevel=*/ true); 2278 2279 if (N->getOpcode() == ISD::LOAD) 2280 N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), 2281 NewBasePtr, N->getOperand(2)); 2282 else 2283 N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), 2284 NewBasePtr, N->getOperand(3)); 2285 2286 LLVM_DEBUG(dbgs() << "--> Final node: "); 2287 LLVM_DEBUG(N->dump(CurDAG)); 2288 } 2289 2290 CurDAG->RemoveDeadNodes(); 2291 GAUsesInFunction.clear(); 2292 RootHeights.clear(); 2293 RootWeights.clear(); 2294 } 2295