1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines an instruction selector for the AArch64 target. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64TargetMachine.h" 15 #include "MCTargetDesc/AArch64AddressingModes.h" 16 #include "llvm/ADT/APSInt.h" 17 #include "llvm/CodeGen/SelectionDAGISel.h" 18 #include "llvm/IR/Function.h" // To access function attributes. 19 #include "llvm/IR/GlobalValue.h" 20 #include "llvm/IR/Intrinsics.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Support/ErrorHandling.h" 23 #include "llvm/Support/MathExtras.h" 24 #include "llvm/Support/raw_ostream.h" 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "aarch64-isel" 29 30 //===--------------------------------------------------------------------===// 31 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine 32 /// instructions for SelectionDAG operations. 33 /// 34 namespace { 35 36 class AArch64DAGToDAGISel : public SelectionDAGISel { 37 AArch64TargetMachine &TM; 38 39 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 40 /// make the right decision when generating code for different targets. 41 const AArch64Subtarget *Subtarget; 42 43 bool ForCodeSize; 44 45 public: 46 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, 47 CodeGenOpt::Level OptLevel) 48 : SelectionDAGISel(tm, OptLevel), TM(tm), Subtarget(nullptr), 49 ForCodeSize(false) {} 50 51 const char *getPassName() const override { 52 return "AArch64 Instruction Selection"; 53 } 54 55 bool runOnMachineFunction(MachineFunction &MF) override { 56 ForCodeSize = 57 MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) || 58 MF.getFunction()->hasFnAttribute(Attribute::MinSize); 59 Subtarget = &MF.getSubtarget<AArch64Subtarget>(); 60 return SelectionDAGISel::runOnMachineFunction(MF); 61 } 62 63 SDNode *Select(SDNode *Node) override; 64 65 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 66 /// inline asm expressions. 67 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 68 unsigned ConstraintID, 69 std::vector<SDValue> &OutOps) override; 70 71 SDNode *SelectMLAV64LaneV128(SDNode *N); 72 SDNode *SelectMULLV64LaneV128(unsigned IntNo, SDNode *N); 73 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); 74 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 75 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 76 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 77 return SelectShiftedRegister(N, false, Reg, Shift); 78 } 79 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 80 return SelectShiftedRegister(N, true, Reg, Shift); 81 } 82 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { 83 return SelectAddrModeIndexed(N, 1, Base, OffImm); 84 } 85 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) { 86 return SelectAddrModeIndexed(N, 2, Base, OffImm); 87 } 88 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) { 89 return SelectAddrModeIndexed(N, 4, Base, OffImm); 90 } 91 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) { 92 return SelectAddrModeIndexed(N, 8, Base, OffImm); 93 } 94 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) { 95 return SelectAddrModeIndexed(N, 16, Base, OffImm); 96 } 97 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) { 98 return SelectAddrModeUnscaled(N, 1, Base, OffImm); 99 } 100 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) { 101 return SelectAddrModeUnscaled(N, 2, Base, OffImm); 102 } 103 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) { 104 return SelectAddrModeUnscaled(N, 4, Base, OffImm); 105 } 106 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) { 107 return SelectAddrModeUnscaled(N, 8, Base, OffImm); 108 } 109 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) { 110 return SelectAddrModeUnscaled(N, 16, Base, OffImm); 111 } 112 113 template<int Width> 114 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset, 115 SDValue &SignExtend, SDValue &DoShift) { 116 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 117 } 118 119 template<int Width> 120 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset, 121 SDValue &SignExtend, SDValue &DoShift) { 122 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 123 } 124 125 126 /// Form sequences of consecutive 64/128-bit registers for use in NEON 127 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have 128 /// between 1 and 4 elements. If it contains a single element that is returned 129 /// unchanged; otherwise a REG_SEQUENCE value is returned. 130 SDValue createDTuple(ArrayRef<SDValue> Vecs); 131 SDValue createQTuple(ArrayRef<SDValue> Vecs); 132 133 /// Generic helper for the createDTuple/createQTuple 134 /// functions. Those should almost always be called instead. 135 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[], 136 const unsigned SubRegs[]); 137 138 SDNode *SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); 139 140 SDNode *SelectIndexedLoad(SDNode *N, bool &Done); 141 142 SDNode *SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 143 unsigned SubRegIdx); 144 SDNode *SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 145 unsigned SubRegIdx); 146 SDNode *SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 147 SDNode *SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 148 149 SDNode *SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); 150 SDNode *SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc); 151 SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 152 SDNode *SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 153 154 SDNode *SelectBitfieldExtractOp(SDNode *N); 155 SDNode *SelectBitfieldInsertOp(SDNode *N); 156 157 SDNode *SelectLIBM(SDNode *N); 158 159 // Include the pieces autogenerated from the target description. 160 #include "AArch64GenDAGISel.inc" 161 162 private: 163 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, 164 SDValue &Shift); 165 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, 166 SDValue &OffImm); 167 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, 168 SDValue &OffImm); 169 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base, 170 SDValue &Offset, SDValue &SignExtend, 171 SDValue &DoShift); 172 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, 173 SDValue &Offset, SDValue &SignExtend, 174 SDValue &DoShift); 175 bool isWorthFolding(SDValue V) const; 176 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, 177 SDValue &Offset, SDValue &SignExtend); 178 179 template<unsigned RegWidth> 180 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { 181 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); 182 } 183 184 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); 185 }; 186 } // end anonymous namespace 187 188 /// isIntImmediate - This method tests to see if the node is a constant 189 /// operand. If so Imm will receive the 32-bit value. 190 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { 191 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) { 192 Imm = C->getZExtValue(); 193 return true; 194 } 195 return false; 196 } 197 198 // isIntImmediate - This method tests to see if a constant operand. 199 // If so Imm will receive the value. 200 static bool isIntImmediate(SDValue N, uint64_t &Imm) { 201 return isIntImmediate(N.getNode(), Imm); 202 } 203 204 // isOpcWithIntImmediate - This method tests to see if the node is a specific 205 // opcode and that it has a immediate integer right operand. 206 // If so Imm will receive the 32 bit value. 207 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, 208 uint64_t &Imm) { 209 return N->getOpcode() == Opc && 210 isIntImmediate(N->getOperand(1).getNode(), Imm); 211 } 212 213 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( 214 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 215 switch(ConstraintID) { 216 default: 217 llvm_unreachable("Unexpected asm memory constraint"); 218 case InlineAsm::Constraint_i: 219 case InlineAsm::Constraint_m: 220 case InlineAsm::Constraint_Q: 221 // Require the address to be in a register. That is safe for all AArch64 222 // variants and it is hard to do anything much smarter without knowing 223 // how the operand is used. 224 OutOps.push_back(Op); 225 return false; 226 } 227 return true; 228 } 229 230 /// SelectArithImmed - Select an immediate value that can be represented as 231 /// a 12-bit value shifted left by either 0 or 12. If so, return true with 232 /// Val set to the 12-bit value and Shift set to the shifter operand. 233 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, 234 SDValue &Shift) { 235 // This function is called from the addsub_shifted_imm ComplexPattern, 236 // which lists [imm] as the list of opcode it's interested in, however 237 // we still need to check whether the operand is actually an immediate 238 // here because the ComplexPattern opcode list is only used in 239 // root-level opcode matching. 240 if (!isa<ConstantSDNode>(N.getNode())) 241 return false; 242 243 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); 244 unsigned ShiftAmt; 245 246 if (Immed >> 12 == 0) { 247 ShiftAmt = 0; 248 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { 249 ShiftAmt = 12; 250 Immed = Immed >> 12; 251 } else 252 return false; 253 254 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); 255 Val = CurDAG->getTargetConstant(Immed, MVT::i32); 256 Shift = CurDAG->getTargetConstant(ShVal, MVT::i32); 257 return true; 258 } 259 260 /// SelectNegArithImmed - As above, but negates the value before trying to 261 /// select it. 262 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, 263 SDValue &Shift) { 264 // This function is called from the addsub_shifted_imm ComplexPattern, 265 // which lists [imm] as the list of opcode it's interested in, however 266 // we still need to check whether the operand is actually an immediate 267 // here because the ComplexPattern opcode list is only used in 268 // root-level opcode matching. 269 if (!isa<ConstantSDNode>(N.getNode())) 270 return false; 271 272 // The immediate operand must be a 24-bit zero-extended immediate. 273 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); 274 275 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" 276 // have the opposite effect on the C flag, so this pattern mustn't match under 277 // those circumstances. 278 if (Immed == 0) 279 return false; 280 281 if (N.getValueType() == MVT::i32) 282 Immed = ~((uint32_t)Immed) + 1; 283 else 284 Immed = ~Immed + 1ULL; 285 if (Immed & 0xFFFFFFFFFF000000ULL) 286 return false; 287 288 Immed &= 0xFFFFFFULL; 289 return SelectArithImmed(CurDAG->getConstant(Immed, MVT::i32), Val, Shift); 290 } 291 292 /// getShiftTypeForNode - Translate a shift node to the corresponding 293 /// ShiftType value. 294 static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { 295 switch (N.getOpcode()) { 296 default: 297 return AArch64_AM::InvalidShiftExtend; 298 case ISD::SHL: 299 return AArch64_AM::LSL; 300 case ISD::SRL: 301 return AArch64_AM::LSR; 302 case ISD::SRA: 303 return AArch64_AM::ASR; 304 case ISD::ROTR: 305 return AArch64_AM::ROR; 306 } 307 } 308 309 /// \brief Determine whether it is worth to fold V into an extended register. 310 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { 311 // it hurts if the value is used at least twice, unless we are optimizing 312 // for code size. 313 if (ForCodeSize || V.hasOneUse()) 314 return true; 315 return false; 316 } 317 318 /// SelectShiftedRegister - Select a "shifted register" operand. If the value 319 /// is not shifted, set the Shift operand to default of "LSL 0". The logical 320 /// instructions allow the shifted register to be rotated, but the arithmetic 321 /// instructions do not. The AllowROR parameter specifies whether ROR is 322 /// supported. 323 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, 324 SDValue &Reg, SDValue &Shift) { 325 AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N); 326 if (ShType == AArch64_AM::InvalidShiftExtend) 327 return false; 328 if (!AllowROR && ShType == AArch64_AM::ROR) 329 return false; 330 331 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 332 unsigned BitSize = N.getValueType().getSizeInBits(); 333 unsigned Val = RHS->getZExtValue() & (BitSize - 1); 334 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val); 335 336 Reg = N.getOperand(0); 337 Shift = CurDAG->getTargetConstant(ShVal, MVT::i32); 338 return isWorthFolding(N); 339 } 340 341 return false; 342 } 343 344 /// getExtendTypeForNode - Translate an extend node to the corresponding 345 /// ExtendType value. 346 static AArch64_AM::ShiftExtendType 347 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) { 348 if (N.getOpcode() == ISD::SIGN_EXTEND || 349 N.getOpcode() == ISD::SIGN_EXTEND_INREG) { 350 EVT SrcVT; 351 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG) 352 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT(); 353 else 354 SrcVT = N.getOperand(0).getValueType(); 355 356 if (!IsLoadStore && SrcVT == MVT::i8) 357 return AArch64_AM::SXTB; 358 else if (!IsLoadStore && SrcVT == MVT::i16) 359 return AArch64_AM::SXTH; 360 else if (SrcVT == MVT::i32) 361 return AArch64_AM::SXTW; 362 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 363 364 return AArch64_AM::InvalidShiftExtend; 365 } else if (N.getOpcode() == ISD::ZERO_EXTEND || 366 N.getOpcode() == ISD::ANY_EXTEND) { 367 EVT SrcVT = N.getOperand(0).getValueType(); 368 if (!IsLoadStore && SrcVT == MVT::i8) 369 return AArch64_AM::UXTB; 370 else if (!IsLoadStore && SrcVT == MVT::i16) 371 return AArch64_AM::UXTH; 372 else if (SrcVT == MVT::i32) 373 return AArch64_AM::UXTW; 374 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 375 376 return AArch64_AM::InvalidShiftExtend; 377 } else if (N.getOpcode() == ISD::AND) { 378 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 379 if (!CSD) 380 return AArch64_AM::InvalidShiftExtend; 381 uint64_t AndMask = CSD->getZExtValue(); 382 383 switch (AndMask) { 384 default: 385 return AArch64_AM::InvalidShiftExtend; 386 case 0xFF: 387 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; 388 case 0xFFFF: 389 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; 390 case 0xFFFFFFFF: 391 return AArch64_AM::UXTW; 392 } 393 } 394 395 return AArch64_AM::InvalidShiftExtend; 396 } 397 398 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts. 399 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) { 400 if (DL->getOpcode() != AArch64ISD::DUPLANE16 && 401 DL->getOpcode() != AArch64ISD::DUPLANE32) 402 return false; 403 404 SDValue SV = DL->getOperand(0); 405 if (SV.getOpcode() != ISD::INSERT_SUBVECTOR) 406 return false; 407 408 SDValue EV = SV.getOperand(1); 409 if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR) 410 return false; 411 412 ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode()); 413 ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode()); 414 LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue(); 415 LaneOp = EV.getOperand(0); 416 417 return true; 418 } 419 420 // Helper for SelectOpcV64LaneV128 - Recogzine operatinos where one operand is a 421 // high lane extract. 422 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, 423 SDValue &LaneOp, int &LaneIdx) { 424 425 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) { 426 std::swap(Op0, Op1); 427 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) 428 return false; 429 } 430 StdOp = Op1; 431 return true; 432 } 433 434 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand 435 /// is a lane in the upper half of a 128-bit vector. Recognize and select this 436 /// so that we don't emit unnecessary lane extracts. 437 SDNode *AArch64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) { 438 SDValue Op0 = N->getOperand(0); 439 SDValue Op1 = N->getOperand(1); 440 SDValue MLAOp1; // Will hold ordinary multiplicand for MLA. 441 SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA. 442 int LaneIdx = -1; // Will hold the lane index. 443 444 if (Op1.getOpcode() != ISD::MUL || 445 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, 446 LaneIdx)) { 447 std::swap(Op0, Op1); 448 if (Op1.getOpcode() != ISD::MUL || 449 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, 450 LaneIdx)) 451 return nullptr; 452 } 453 454 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64); 455 456 SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal }; 457 458 unsigned MLAOpc = ~0U; 459 460 switch (N->getSimpleValueType(0).SimpleTy) { 461 default: 462 llvm_unreachable("Unrecognized MLA."); 463 case MVT::v4i16: 464 MLAOpc = AArch64::MLAv4i16_indexed; 465 break; 466 case MVT::v8i16: 467 MLAOpc = AArch64::MLAv8i16_indexed; 468 break; 469 case MVT::v2i32: 470 MLAOpc = AArch64::MLAv2i32_indexed; 471 break; 472 case MVT::v4i32: 473 MLAOpc = AArch64::MLAv4i32_indexed; 474 break; 475 } 476 477 return CurDAG->getMachineNode(MLAOpc, SDLoc(N), N->getValueType(0), Ops); 478 } 479 480 SDNode *AArch64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) { 481 SDValue SMULLOp0; 482 SDValue SMULLOp1; 483 int LaneIdx; 484 485 if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1, 486 LaneIdx)) 487 return nullptr; 488 489 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64); 490 491 SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal }; 492 493 unsigned SMULLOpc = ~0U; 494 495 if (IntNo == Intrinsic::aarch64_neon_smull) { 496 switch (N->getSimpleValueType(0).SimpleTy) { 497 default: 498 llvm_unreachable("Unrecognized SMULL."); 499 case MVT::v4i32: 500 SMULLOpc = AArch64::SMULLv4i16_indexed; 501 break; 502 case MVT::v2i64: 503 SMULLOpc = AArch64::SMULLv2i32_indexed; 504 break; 505 } 506 } else if (IntNo == Intrinsic::aarch64_neon_umull) { 507 switch (N->getSimpleValueType(0).SimpleTy) { 508 default: 509 llvm_unreachable("Unrecognized SMULL."); 510 case MVT::v4i32: 511 SMULLOpc = AArch64::UMULLv4i16_indexed; 512 break; 513 case MVT::v2i64: 514 SMULLOpc = AArch64::UMULLv2i32_indexed; 515 break; 516 } 517 } else 518 llvm_unreachable("Unrecognized intrinsic."); 519 520 return CurDAG->getMachineNode(SMULLOpc, SDLoc(N), N->getValueType(0), Ops); 521 } 522 523 /// Instructions that accept extend modifiers like UXTW expect the register 524 /// being extended to be a GPR32, but the incoming DAG might be acting on a 525 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if 526 /// this is the case. 527 static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { 528 if (N.getValueType() == MVT::i32) 529 return N; 530 531 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32); 532 MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 533 SDLoc(N), MVT::i32, N, SubReg); 534 return SDValue(Node, 0); 535 } 536 537 538 /// SelectArithExtendedRegister - Select a "extended register" operand. This 539 /// operand folds in an extend followed by an optional left shift. 540 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, 541 SDValue &Shift) { 542 unsigned ShiftVal = 0; 543 AArch64_AM::ShiftExtendType Ext; 544 545 if (N.getOpcode() == ISD::SHL) { 546 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 547 if (!CSD) 548 return false; 549 ShiftVal = CSD->getZExtValue(); 550 if (ShiftVal > 4) 551 return false; 552 553 Ext = getExtendTypeForNode(N.getOperand(0)); 554 if (Ext == AArch64_AM::InvalidShiftExtend) 555 return false; 556 557 Reg = N.getOperand(0).getOperand(0); 558 } else { 559 Ext = getExtendTypeForNode(N); 560 if (Ext == AArch64_AM::InvalidShiftExtend) 561 return false; 562 563 Reg = N.getOperand(0); 564 } 565 566 // AArch64 mandates that the RHS of the operation must use the smallest 567 // register classs that could contain the size being extended from. Thus, 568 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though 569 // there might not be an actual 32-bit value in the program. We can 570 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. 571 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX); 572 Reg = narrowIfNeeded(CurDAG, Reg); 573 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), MVT::i32); 574 return isWorthFolding(N); 575 } 576 577 /// If there's a use of this ADDlow that's not itself a load/store then we'll 578 /// need to create a real ADD instruction from it anyway and there's no point in 579 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's 580 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding 581 /// leads to duplaicated ADRP instructions. 582 static bool isWorthFoldingADDlow(SDValue N) { 583 for (auto Use : N->uses()) { 584 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && 585 Use->getOpcode() != ISD::ATOMIC_LOAD && 586 Use->getOpcode() != ISD::ATOMIC_STORE) 587 return false; 588 589 // ldar and stlr have much more restrictive addressing modes (just a 590 // register). 591 if (cast<MemSDNode>(Use)->getOrdering() > Monotonic) 592 return false; 593 } 594 595 return true; 596 } 597 598 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit 599 /// immediate" address. The "Size" argument is the size in bytes of the memory 600 /// reference, which determines the scale. 601 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, 602 SDValue &Base, SDValue &OffImm) { 603 const TargetLowering *TLI = getTargetLowering(); 604 if (N.getOpcode() == ISD::FrameIndex) { 605 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 606 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); 607 OffImm = CurDAG->getTargetConstant(0, MVT::i64); 608 return true; 609 } 610 611 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) { 612 GlobalAddressSDNode *GAN = 613 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode()); 614 Base = N.getOperand(0); 615 OffImm = N.getOperand(1); 616 if (!GAN) 617 return true; 618 619 const GlobalValue *GV = GAN->getGlobal(); 620 unsigned Alignment = GV->getAlignment(); 621 const DataLayout *DL = TLI->getDataLayout(); 622 Type *Ty = GV->getType()->getElementType(); 623 if (Alignment == 0 && Ty->isSized()) 624 Alignment = DL->getABITypeAlignment(Ty); 625 626 if (Alignment >= Size) 627 return true; 628 } 629 630 if (CurDAG->isBaseWithConstantOffset(N)) { 631 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 632 int64_t RHSC = (int64_t)RHS->getZExtValue(); 633 unsigned Scale = Log2_32(Size); 634 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { 635 Base = N.getOperand(0); 636 if (Base.getOpcode() == ISD::FrameIndex) { 637 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 638 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); 639 } 640 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, MVT::i64); 641 return true; 642 } 643 } 644 } 645 646 // Before falling back to our general case, check if the unscaled 647 // instructions can handle this. If so, that's preferable. 648 if (SelectAddrModeUnscaled(N, Size, Base, OffImm)) 649 return false; 650 651 // Base only. The address will be materialized into a register before 652 // the memory is accessed. 653 // add x0, Xbase, #offset 654 // ldr x0, [x0] 655 Base = N; 656 OffImm = CurDAG->getTargetConstant(0, MVT::i64); 657 return true; 658 } 659 660 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit 661 /// immediate" address. This should only match when there is an offset that 662 /// is not valid for a scaled immediate addressing mode. The "Size" argument 663 /// is the size in bytes of the memory reference, which is needed here to know 664 /// what is valid for a scaled immediate. 665 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, 666 SDValue &Base, 667 SDValue &OffImm) { 668 if (!CurDAG->isBaseWithConstantOffset(N)) 669 return false; 670 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 671 int64_t RHSC = RHS->getSExtValue(); 672 // If the offset is valid as a scaled immediate, don't match here. 673 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && 674 RHSC < (0x1000 << Log2_32(Size))) 675 return false; 676 if (RHSC >= -256 && RHSC < 256) { 677 Base = N.getOperand(0); 678 if (Base.getOpcode() == ISD::FrameIndex) { 679 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 680 const TargetLowering *TLI = getTargetLowering(); 681 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); 682 } 683 OffImm = CurDAG->getTargetConstant(RHSC, MVT::i64); 684 return true; 685 } 686 } 687 return false; 688 } 689 690 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { 691 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32); 692 SDValue ImpDef = SDValue( 693 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SDLoc(N), MVT::i64), 694 0); 695 MachineSDNode *Node = CurDAG->getMachineNode( 696 TargetOpcode::INSERT_SUBREG, SDLoc(N), MVT::i64, ImpDef, N, SubReg); 697 return SDValue(Node, 0); 698 } 699 700 /// \brief Check if the given SHL node (\p N), can be used to form an 701 /// extended register for an addressing mode. 702 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, 703 bool WantExtend, SDValue &Offset, 704 SDValue &SignExtend) { 705 assert(N.getOpcode() == ISD::SHL && "Invalid opcode."); 706 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 707 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue()) 708 return false; 709 710 if (WantExtend) { 711 AArch64_AM::ShiftExtendType Ext = 712 getExtendTypeForNode(N.getOperand(0), true); 713 if (Ext == AArch64_AM::InvalidShiftExtend) 714 return false; 715 716 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); 717 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32); 718 } else { 719 Offset = N.getOperand(0); 720 SignExtend = CurDAG->getTargetConstant(0, MVT::i32); 721 } 722 723 unsigned LegalShiftVal = Log2_32(Size); 724 unsigned ShiftVal = CSD->getZExtValue(); 725 726 if (ShiftVal != 0 && ShiftVal != LegalShiftVal) 727 return false; 728 729 if (isWorthFolding(N)) 730 return true; 731 732 return false; 733 } 734 735 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, 736 SDValue &Base, SDValue &Offset, 737 SDValue &SignExtend, 738 SDValue &DoShift) { 739 if (N.getOpcode() != ISD::ADD) 740 return false; 741 SDValue LHS = N.getOperand(0); 742 SDValue RHS = N.getOperand(1); 743 744 // We don't want to match immediate adds here, because they are better lowered 745 // to the register-immediate addressing modes. 746 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS)) 747 return false; 748 749 // Check if this particular node is reused in any non-memory related 750 // operation. If yes, do not try to fold this node into the address 751 // computation, since the computation will be kept. 752 const SDNode *Node = N.getNode(); 753 for (SDNode *UI : Node->uses()) { 754 if (!isa<MemSDNode>(*UI)) 755 return false; 756 } 757 758 // Remember if it is worth folding N when it produces extended register. 759 bool IsExtendedRegisterWorthFolding = isWorthFolding(N); 760 761 // Try to match a shifted extend on the RHS. 762 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 763 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) { 764 Base = LHS; 765 DoShift = CurDAG->getTargetConstant(true, MVT::i32); 766 return true; 767 } 768 769 // Try to match a shifted extend on the LHS. 770 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 771 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) { 772 Base = RHS; 773 DoShift = CurDAG->getTargetConstant(true, MVT::i32); 774 return true; 775 } 776 777 // There was no shift, whatever else we find. 778 DoShift = CurDAG->getTargetConstant(false, MVT::i32); 779 780 AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend; 781 // Try to match an unshifted extend on the LHS. 782 if (IsExtendedRegisterWorthFolding && 783 (Ext = getExtendTypeForNode(LHS, true)) != 784 AArch64_AM::InvalidShiftExtend) { 785 Base = RHS; 786 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); 787 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32); 788 if (isWorthFolding(LHS)) 789 return true; 790 } 791 792 // Try to match an unshifted extend on the RHS. 793 if (IsExtendedRegisterWorthFolding && 794 (Ext = getExtendTypeForNode(RHS, true)) != 795 AArch64_AM::InvalidShiftExtend) { 796 Base = LHS; 797 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); 798 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32); 799 if (isWorthFolding(RHS)) 800 return true; 801 } 802 803 return false; 804 } 805 806 // Check if the given immediate is preferred by ADD. If an immediate can be 807 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be 808 // encoded by one MOVZ, return true. 809 static bool isPreferredADD(int64_t ImmOff) { 810 // Constant in [0x0, 0xfff] can be encoded in ADD. 811 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) 812 return true; 813 // Check if it can be encoded in an "ADD LSL #12". 814 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL) 815 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant. 816 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && 817 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; 818 return false; 819 } 820 821 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, 822 SDValue &Base, SDValue &Offset, 823 SDValue &SignExtend, 824 SDValue &DoShift) { 825 if (N.getOpcode() != ISD::ADD) 826 return false; 827 SDValue LHS = N.getOperand(0); 828 SDValue RHS = N.getOperand(1); 829 830 // Check if this particular node is reused in any non-memory related 831 // operation. If yes, do not try to fold this node into the address 832 // computation, since the computation will be kept. 833 const SDNode *Node = N.getNode(); 834 for (SDNode *UI : Node->uses()) { 835 if (!isa<MemSDNode>(*UI)) 836 return false; 837 } 838 839 // Watch out if RHS is a wide immediate, it can not be selected into 840 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into 841 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate 842 // instructions like: 843 // MOV X0, WideImmediate 844 // ADD X1, BaseReg, X0 845 // LDR X2, [X1, 0] 846 // For such situation, using [BaseReg, XReg] addressing mode can save one 847 // ADD/SUB: 848 // MOV X0, WideImmediate 849 // LDR X2, [BaseReg, X0] 850 if (isa<ConstantSDNode>(RHS)) { 851 int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue(); 852 unsigned Scale = Log2_32(Size); 853 // Skip the immediate can be seleced by load/store addressing mode. 854 // Also skip the immediate can be encoded by a single ADD (SUB is also 855 // checked by using -ImmOff). 856 if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) || 857 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) 858 return false; 859 860 SDLoc DL(N.getNode()); 861 SDValue Ops[] = { RHS }; 862 SDNode *MOVI = 863 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); 864 SDValue MOVIV = SDValue(MOVI, 0); 865 // This ADD of two X register will be selected into [Reg+Reg] mode. 866 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV); 867 } 868 869 // Remember if it is worth folding N when it produces extended register. 870 bool IsExtendedRegisterWorthFolding = isWorthFolding(N); 871 872 // Try to match a shifted extend on the RHS. 873 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 874 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) { 875 Base = LHS; 876 DoShift = CurDAG->getTargetConstant(true, MVT::i32); 877 return true; 878 } 879 880 // Try to match a shifted extend on the LHS. 881 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 882 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) { 883 Base = RHS; 884 DoShift = CurDAG->getTargetConstant(true, MVT::i32); 885 return true; 886 } 887 888 // Match any non-shifted, non-extend, non-immediate add expression. 889 Base = LHS; 890 Offset = RHS; 891 SignExtend = CurDAG->getTargetConstant(false, MVT::i32); 892 DoShift = CurDAG->getTargetConstant(false, MVT::i32); 893 // Reg1 + Reg2 is free: no check needed. 894 return true; 895 } 896 897 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) { 898 static const unsigned RegClassIDs[] = { 899 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; 900 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, 901 AArch64::dsub2, AArch64::dsub3}; 902 903 return createTuple(Regs, RegClassIDs, SubRegs); 904 } 905 906 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) { 907 static const unsigned RegClassIDs[] = { 908 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; 909 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, 910 AArch64::qsub2, AArch64::qsub3}; 911 912 return createTuple(Regs, RegClassIDs, SubRegs); 913 } 914 915 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs, 916 const unsigned RegClassIDs[], 917 const unsigned SubRegs[]) { 918 // There's no special register-class for a vector-list of 1 element: it's just 919 // a vector. 920 if (Regs.size() == 1) 921 return Regs[0]; 922 923 assert(Regs.size() >= 2 && Regs.size() <= 4); 924 925 SDLoc DL(Regs[0].getNode()); 926 927 SmallVector<SDValue, 4> Ops; 928 929 // First operand of REG_SEQUENCE is the desired RegClass. 930 Ops.push_back( 931 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32)); 932 933 // Then we get pairs of source & subregister-position for the components. 934 for (unsigned i = 0; i < Regs.size(); ++i) { 935 Ops.push_back(Regs[i]); 936 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32)); 937 } 938 939 SDNode *N = 940 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 941 return SDValue(N, 0); 942 } 943 944 SDNode *AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, 945 unsigned Opc, bool isExt) { 946 SDLoc dl(N); 947 EVT VT = N->getValueType(0); 948 949 unsigned ExtOff = isExt; 950 951 // Form a REG_SEQUENCE to force register allocation. 952 unsigned Vec0Off = ExtOff + 1; 953 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off, 954 N->op_begin() + Vec0Off + NumVecs); 955 SDValue RegSeq = createQTuple(Regs); 956 957 SmallVector<SDValue, 6> Ops; 958 if (isExt) 959 Ops.push_back(N->getOperand(1)); 960 Ops.push_back(RegSeq); 961 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1)); 962 return CurDAG->getMachineNode(Opc, dl, VT, Ops); 963 } 964 965 SDNode *AArch64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) { 966 LoadSDNode *LD = cast<LoadSDNode>(N); 967 if (LD->isUnindexed()) 968 return nullptr; 969 EVT VT = LD->getMemoryVT(); 970 EVT DstVT = N->getValueType(0); 971 ISD::MemIndexedMode AM = LD->getAddressingMode(); 972 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; 973 974 // We're not doing validity checking here. That was done when checking 975 // if we should mark the load as indexed or not. We're just selecting 976 // the right instruction. 977 unsigned Opcode = 0; 978 979 ISD::LoadExtType ExtType = LD->getExtensionType(); 980 bool InsertTo64 = false; 981 if (VT == MVT::i64) 982 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost; 983 else if (VT == MVT::i32) { 984 if (ExtType == ISD::NON_EXTLOAD) 985 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 986 else if (ExtType == ISD::SEXTLOAD) 987 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; 988 else { 989 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 990 InsertTo64 = true; 991 // The result of the load is only i32. It's the subreg_to_reg that makes 992 // it into an i64. 993 DstVT = MVT::i32; 994 } 995 } else if (VT == MVT::i16) { 996 if (ExtType == ISD::SEXTLOAD) { 997 if (DstVT == MVT::i64) 998 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; 999 else 1000 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; 1001 } else { 1002 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; 1003 InsertTo64 = DstVT == MVT::i64; 1004 // The result of the load is only i32. It's the subreg_to_reg that makes 1005 // it into an i64. 1006 DstVT = MVT::i32; 1007 } 1008 } else if (VT == MVT::i8) { 1009 if (ExtType == ISD::SEXTLOAD) { 1010 if (DstVT == MVT::i64) 1011 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; 1012 else 1013 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; 1014 } else { 1015 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; 1016 InsertTo64 = DstVT == MVT::i64; 1017 // The result of the load is only i32. It's the subreg_to_reg that makes 1018 // it into an i64. 1019 DstVT = MVT::i32; 1020 } 1021 } else if (VT == MVT::f32) { 1022 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; 1023 } else if (VT == MVT::f64 || VT.is64BitVector()) { 1024 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost; 1025 } else if (VT.is128BitVector()) { 1026 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost; 1027 } else 1028 return nullptr; 1029 SDValue Chain = LD->getChain(); 1030 SDValue Base = LD->getBasePtr(); 1031 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset()); 1032 int OffsetVal = (int)OffsetOp->getZExtValue(); 1033 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, MVT::i64); 1034 SDValue Ops[] = { Base, Offset, Chain }; 1035 SDNode *Res = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i64, DstVT, 1036 MVT::Other, Ops); 1037 // Either way, we're replacing the node, so tell the caller that. 1038 Done = true; 1039 SDValue LoadedVal = SDValue(Res, 1); 1040 if (InsertTo64) { 1041 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32); 1042 LoadedVal = 1043 SDValue(CurDAG->getMachineNode( 1044 AArch64::SUBREG_TO_REG, SDLoc(N), MVT::i64, 1045 CurDAG->getTargetConstant(0, MVT::i64), LoadedVal, SubReg), 1046 0); 1047 } 1048 1049 ReplaceUses(SDValue(N, 0), LoadedVal); 1050 ReplaceUses(SDValue(N, 1), SDValue(Res, 0)); 1051 ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); 1052 1053 return nullptr; 1054 } 1055 1056 SDNode *AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, 1057 unsigned Opc, unsigned SubRegIdx) { 1058 SDLoc dl(N); 1059 EVT VT = N->getValueType(0); 1060 SDValue Chain = N->getOperand(0); 1061 1062 SDValue Ops[] = {N->getOperand(2), // Mem operand; 1063 Chain}; 1064 1065 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1066 1067 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1068 SDValue SuperReg = SDValue(Ld, 0); 1069 for (unsigned i = 0; i < NumVecs; ++i) 1070 ReplaceUses(SDValue(N, i), 1071 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1072 1073 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 1074 return nullptr; 1075 } 1076 1077 SDNode *AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, 1078 unsigned Opc, unsigned SubRegIdx) { 1079 SDLoc dl(N); 1080 EVT VT = N->getValueType(0); 1081 SDValue Chain = N->getOperand(0); 1082 1083 SDValue Ops[] = {N->getOperand(1), // Mem operand 1084 N->getOperand(2), // Incremental 1085 Chain}; 1086 1087 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1088 MVT::Untyped, MVT::Other}; 1089 1090 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1091 1092 // Update uses of write back register 1093 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 1094 1095 // Update uses of vector list 1096 SDValue SuperReg = SDValue(Ld, 1); 1097 if (NumVecs == 1) 1098 ReplaceUses(SDValue(N, 0), SuperReg); 1099 else 1100 for (unsigned i = 0; i < NumVecs; ++i) 1101 ReplaceUses(SDValue(N, i), 1102 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1103 1104 // Update the chain 1105 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 1106 return nullptr; 1107 } 1108 1109 SDNode *AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, 1110 unsigned Opc) { 1111 SDLoc dl(N); 1112 EVT VT = N->getOperand(2)->getValueType(0); 1113 1114 // Form a REG_SEQUENCE to force register allocation. 1115 bool Is128Bit = VT.getSizeInBits() == 128; 1116 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1117 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 1118 1119 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)}; 1120 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 1121 1122 return St; 1123 } 1124 1125 SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, 1126 unsigned Opc) { 1127 SDLoc dl(N); 1128 EVT VT = N->getOperand(2)->getValueType(0); 1129 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1130 MVT::Other}; // Type for the Chain 1131 1132 // Form a REG_SEQUENCE to force register allocation. 1133 bool Is128Bit = VT.getSizeInBits() == 128; 1134 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1135 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 1136 1137 SDValue Ops[] = {RegSeq, 1138 N->getOperand(NumVecs + 1), // base register 1139 N->getOperand(NumVecs + 2), // Incremental 1140 N->getOperand(0)}; // Chain 1141 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1142 1143 return St; 1144 } 1145 1146 namespace { 1147 /// WidenVector - Given a value in the V64 register class, produce the 1148 /// equivalent value in the V128 register class. 1149 class WidenVector { 1150 SelectionDAG &DAG; 1151 1152 public: 1153 WidenVector(SelectionDAG &DAG) : DAG(DAG) {} 1154 1155 SDValue operator()(SDValue V64Reg) { 1156 EVT VT = V64Reg.getValueType(); 1157 unsigned NarrowSize = VT.getVectorNumElements(); 1158 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 1159 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); 1160 SDLoc DL(V64Reg); 1161 1162 SDValue Undef = 1163 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0); 1164 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg); 1165 } 1166 }; 1167 } // namespace 1168 1169 /// NarrowVector - Given a value in the V128 register class, produce the 1170 /// equivalent value in the V64 register class. 1171 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { 1172 EVT VT = V128Reg.getValueType(); 1173 unsigned WideSize = VT.getVectorNumElements(); 1174 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 1175 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); 1176 1177 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy, 1178 V128Reg); 1179 } 1180 1181 SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, 1182 unsigned Opc) { 1183 SDLoc dl(N); 1184 EVT VT = N->getValueType(0); 1185 bool Narrow = VT.getSizeInBits() == 64; 1186 1187 // Form a REG_SEQUENCE to force register allocation. 1188 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1189 1190 if (Narrow) 1191 std::transform(Regs.begin(), Regs.end(), Regs.begin(), 1192 WidenVector(*CurDAG)); 1193 1194 SDValue RegSeq = createQTuple(Regs); 1195 1196 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1197 1198 unsigned LaneNo = 1199 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); 1200 1201 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, MVT::i64), 1202 N->getOperand(NumVecs + 3), N->getOperand(0)}; 1203 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1204 SDValue SuperReg = SDValue(Ld, 0); 1205 1206 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 1207 static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2, 1208 AArch64::qsub3 }; 1209 for (unsigned i = 0; i < NumVecs; ++i) { 1210 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); 1211 if (Narrow) 1212 NV = NarrowVector(NV, *CurDAG); 1213 ReplaceUses(SDValue(N, i), NV); 1214 } 1215 1216 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 1217 1218 return Ld; 1219 } 1220 1221 SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, 1222 unsigned Opc) { 1223 SDLoc dl(N); 1224 EVT VT = N->getValueType(0); 1225 bool Narrow = VT.getSizeInBits() == 64; 1226 1227 // Form a REG_SEQUENCE to force register allocation. 1228 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1229 1230 if (Narrow) 1231 std::transform(Regs.begin(), Regs.end(), Regs.begin(), 1232 WidenVector(*CurDAG)); 1233 1234 SDValue RegSeq = createQTuple(Regs); 1235 1236 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1237 MVT::Untyped, MVT::Other}; 1238 1239 unsigned LaneNo = 1240 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); 1241 1242 SDValue Ops[] = {RegSeq, 1243 CurDAG->getTargetConstant(LaneNo, MVT::i64), // Lane Number 1244 N->getOperand(NumVecs + 2), // Base register 1245 N->getOperand(NumVecs + 3), // Incremental 1246 N->getOperand(0)}; 1247 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1248 1249 // Update uses of the write back register 1250 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 1251 1252 // Update uses of the vector list 1253 SDValue SuperReg = SDValue(Ld, 1); 1254 if (NumVecs == 1) { 1255 ReplaceUses(SDValue(N, 0), 1256 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg); 1257 } else { 1258 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 1259 static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2, 1260 AArch64::qsub3 }; 1261 for (unsigned i = 0; i < NumVecs; ++i) { 1262 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, 1263 SuperReg); 1264 if (Narrow) 1265 NV = NarrowVector(NV, *CurDAG); 1266 ReplaceUses(SDValue(N, i), NV); 1267 } 1268 } 1269 1270 // Update the Chain 1271 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 1272 1273 return Ld; 1274 } 1275 1276 SDNode *AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, 1277 unsigned Opc) { 1278 SDLoc dl(N); 1279 EVT VT = N->getOperand(2)->getValueType(0); 1280 bool Narrow = VT.getSizeInBits() == 64; 1281 1282 // Form a REG_SEQUENCE to force register allocation. 1283 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1284 1285 if (Narrow) 1286 std::transform(Regs.begin(), Regs.end(), Regs.begin(), 1287 WidenVector(*CurDAG)); 1288 1289 SDValue RegSeq = createQTuple(Regs); 1290 1291 unsigned LaneNo = 1292 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); 1293 1294 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, MVT::i64), 1295 N->getOperand(NumVecs + 3), N->getOperand(0)}; 1296 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 1297 1298 // Transfer memoperands. 1299 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1300 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1301 cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1); 1302 1303 return St; 1304 } 1305 1306 SDNode *AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, 1307 unsigned Opc) { 1308 SDLoc dl(N); 1309 EVT VT = N->getOperand(2)->getValueType(0); 1310 bool Narrow = VT.getSizeInBits() == 64; 1311 1312 // Form a REG_SEQUENCE to force register allocation. 1313 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1314 1315 if (Narrow) 1316 std::transform(Regs.begin(), Regs.end(), Regs.begin(), 1317 WidenVector(*CurDAG)); 1318 1319 SDValue RegSeq = createQTuple(Regs); 1320 1321 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1322 MVT::Other}; 1323 1324 unsigned LaneNo = 1325 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); 1326 1327 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, MVT::i64), 1328 N->getOperand(NumVecs + 2), // Base Register 1329 N->getOperand(NumVecs + 3), // Incremental 1330 N->getOperand(0)}; 1331 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1332 1333 // Transfer memoperands. 1334 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1335 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1336 cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1); 1337 1338 return St; 1339 } 1340 1341 static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, 1342 unsigned &Opc, SDValue &Opd0, 1343 unsigned &LSB, unsigned &MSB, 1344 unsigned NumberOfIgnoredLowBits, 1345 bool BiggerPattern) { 1346 assert(N->getOpcode() == ISD::AND && 1347 "N must be a AND operation to call this function"); 1348 1349 EVT VT = N->getValueType(0); 1350 1351 // Here we can test the type of VT and return false when the type does not 1352 // match, but since it is done prior to that call in the current context 1353 // we turned that into an assert to avoid redundant code. 1354 assert((VT == MVT::i32 || VT == MVT::i64) && 1355 "Type checking must have been done before calling this function"); 1356 1357 // FIXME: simplify-demanded-bits in DAGCombine will probably have 1358 // changed the AND node to a 32-bit mask operation. We'll have to 1359 // undo that as part of the transform here if we want to catch all 1360 // the opportunities. 1361 // Currently the NumberOfIgnoredLowBits argument helps to recover 1362 // form these situations when matching bigger pattern (bitfield insert). 1363 1364 // For unsigned extracts, check for a shift right and mask 1365 uint64_t And_imm = 0; 1366 if (!isOpcWithIntImmediate(N, ISD::AND, And_imm)) 1367 return false; 1368 1369 const SDNode *Op0 = N->getOperand(0).getNode(); 1370 1371 // Because of simplify-demanded-bits in DAGCombine, the mask may have been 1372 // simplified. Try to undo that 1373 And_imm |= (1 << NumberOfIgnoredLowBits) - 1; 1374 1375 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 1376 if (And_imm & (And_imm + 1)) 1377 return false; 1378 1379 bool ClampMSB = false; 1380 uint64_t Srl_imm = 0; 1381 // Handle the SRL + ANY_EXTEND case. 1382 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND && 1383 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, Srl_imm)) { 1384 // Extend the incoming operand of the SRL to 64-bit. 1385 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0)); 1386 // Make sure to clamp the MSB so that we preserve the semantics of the 1387 // original operations. 1388 ClampMSB = true; 1389 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE && 1390 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, 1391 Srl_imm)) { 1392 // If the shift result was truncated, we can still combine them. 1393 Opd0 = Op0->getOperand(0).getOperand(0); 1394 1395 // Use the type of SRL node. 1396 VT = Opd0->getValueType(0); 1397 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, Srl_imm)) { 1398 Opd0 = Op0->getOperand(0); 1399 } else if (BiggerPattern) { 1400 // Let's pretend a 0 shift right has been performed. 1401 // The resulting code will be at least as good as the original one 1402 // plus it may expose more opportunities for bitfield insert pattern. 1403 // FIXME: Currently we limit this to the bigger pattern, because 1404 // some optimizations expect AND and not UBFM 1405 Opd0 = N->getOperand(0); 1406 } else 1407 return false; 1408 1409 // Bail out on large immediates. This happens when no proper 1410 // combining/constant folding was performed. 1411 if (!BiggerPattern && (Srl_imm <= 0 || Srl_imm >= VT.getSizeInBits())) { 1412 DEBUG((dbgs() << N 1413 << ": Found large shift immediate, this should not happen\n")); 1414 return false; 1415 } 1416 1417 LSB = Srl_imm; 1418 MSB = Srl_imm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(And_imm) 1419 : countTrailingOnes<uint64_t>(And_imm)) - 1420 1; 1421 if (ClampMSB) 1422 // Since we're moving the extend before the right shift operation, we need 1423 // to clamp the MSB to make sure we don't shift in undefined bits instead of 1424 // the zeros which would get shifted in with the original right shift 1425 // operation. 1426 MSB = MSB > 31 ? 31 : MSB; 1427 1428 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; 1429 return true; 1430 } 1431 1432 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, 1433 SDValue &Opd0, unsigned &LSB, 1434 unsigned &MSB) { 1435 // We are looking for the following pattern which basically extracts several 1436 // continuous bits from the source value and places it from the LSB of the 1437 // destination value, all other bits of the destination value or set to zero: 1438 // 1439 // Value2 = AND Value, MaskImm 1440 // SRL Value2, ShiftImm 1441 // 1442 // with MaskImm >> ShiftImm to search for the bit width. 1443 // 1444 // This gets selected into a single UBFM: 1445 // 1446 // UBFM Value, ShiftImm, BitWide + Srl_imm -1 1447 // 1448 1449 if (N->getOpcode() != ISD::SRL) 1450 return false; 1451 1452 uint64_t And_mask = 0; 1453 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_mask)) 1454 return false; 1455 1456 Opd0 = N->getOperand(0).getOperand(0); 1457 1458 uint64_t Srl_imm = 0; 1459 if (!isIntImmediate(N->getOperand(1), Srl_imm)) 1460 return false; 1461 1462 // Check whether we really have several bits extract here. 1463 unsigned BitWide = 64 - countLeadingOnes(~(And_mask >> Srl_imm)); 1464 if (BitWide && isMask_64(And_mask >> Srl_imm)) { 1465 if (N->getValueType(0) == MVT::i32) 1466 Opc = AArch64::UBFMWri; 1467 else 1468 Opc = AArch64::UBFMXri; 1469 1470 LSB = Srl_imm; 1471 MSB = BitWide + Srl_imm - 1; 1472 return true; 1473 } 1474 1475 return false; 1476 } 1477 1478 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, 1479 unsigned &LSB, unsigned &MSB, 1480 bool BiggerPattern) { 1481 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && 1482 "N must be a SHR/SRA operation to call this function"); 1483 1484 EVT VT = N->getValueType(0); 1485 1486 // Here we can test the type of VT and return false when the type does not 1487 // match, but since it is done prior to that call in the current context 1488 // we turned that into an assert to avoid redundant code. 1489 assert((VT == MVT::i32 || VT == MVT::i64) && 1490 "Type checking must have been done before calling this function"); 1491 1492 // Check for AND + SRL doing several bits extract. 1493 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, LSB, MSB)) 1494 return true; 1495 1496 // we're looking for a shift of a shift 1497 uint64_t Shl_imm = 0; 1498 uint64_t Trunc_bits = 0; 1499 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { 1500 Opd0 = N->getOperand(0).getOperand(0); 1501 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL && 1502 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) { 1503 // We are looking for a shift of truncate. Truncate from i64 to i32 could 1504 // be considered as setting high 32 bits as zero. Our strategy here is to 1505 // always generate 64bit UBFM. This consistency will help the CSE pass 1506 // later find more redundancy. 1507 Opd0 = N->getOperand(0).getOperand(0); 1508 Trunc_bits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits(); 1509 VT = Opd0->getValueType(0); 1510 assert(VT == MVT::i64 && "the promoted type should be i64"); 1511 } else if (BiggerPattern) { 1512 // Let's pretend a 0 shift left has been performed. 1513 // FIXME: Currently we limit this to the bigger pattern case, 1514 // because some optimizations expect AND and not UBFM 1515 Opd0 = N->getOperand(0); 1516 } else 1517 return false; 1518 1519 // Missing combines/constant folding may have left us with strange 1520 // constants. 1521 if (Shl_imm >= VT.getSizeInBits()) { 1522 DEBUG((dbgs() << N 1523 << ": Found large shift immediate, this should not happen\n")); 1524 return false; 1525 } 1526 1527 uint64_t Srl_imm = 0; 1528 if (!isIntImmediate(N->getOperand(1), Srl_imm)) 1529 return false; 1530 1531 assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() && 1532 "bad amount in shift node!"); 1533 // Note: The width operand is encoded as width-1. 1534 unsigned Width = VT.getSizeInBits() - Trunc_bits - Srl_imm - 1; 1535 int sLSB = Srl_imm - Shl_imm; 1536 if (sLSB < 0) 1537 return false; 1538 LSB = sLSB; 1539 MSB = LSB + Width; 1540 // SRA requires a signed extraction 1541 if (VT == MVT::i32) 1542 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri; 1543 else 1544 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri; 1545 return true; 1546 } 1547 1548 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, 1549 SDValue &Opd0, unsigned &LSB, unsigned &MSB, 1550 unsigned NumberOfIgnoredLowBits = 0, 1551 bool BiggerPattern = false) { 1552 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) 1553 return false; 1554 1555 switch (N->getOpcode()) { 1556 default: 1557 if (!N->isMachineOpcode()) 1558 return false; 1559 break; 1560 case ISD::AND: 1561 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, LSB, MSB, 1562 NumberOfIgnoredLowBits, BiggerPattern); 1563 case ISD::SRL: 1564 case ISD::SRA: 1565 return isBitfieldExtractOpFromShr(N, Opc, Opd0, LSB, MSB, BiggerPattern); 1566 } 1567 1568 unsigned NOpc = N->getMachineOpcode(); 1569 switch (NOpc) { 1570 default: 1571 return false; 1572 case AArch64::SBFMWri: 1573 case AArch64::UBFMWri: 1574 case AArch64::SBFMXri: 1575 case AArch64::UBFMXri: 1576 Opc = NOpc; 1577 Opd0 = N->getOperand(0); 1578 LSB = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); 1579 MSB = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); 1580 return true; 1581 } 1582 // Unreachable 1583 return false; 1584 } 1585 1586 SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) { 1587 unsigned Opc, LSB, MSB; 1588 SDValue Opd0; 1589 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, LSB, MSB)) 1590 return nullptr; 1591 1592 EVT VT = N->getValueType(0); 1593 1594 // If the bit extract operation is 64bit but the original type is 32bit, we 1595 // need to add one EXTRACT_SUBREG. 1596 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) { 1597 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(LSB, MVT::i64), 1598 CurDAG->getTargetConstant(MSB, MVT::i64)}; 1599 1600 SDNode *BFM = CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i64, Ops64); 1601 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32); 1602 MachineSDNode *Node = 1603 CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32, 1604 SDValue(BFM, 0), SubReg); 1605 return Node; 1606 } 1607 1608 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(LSB, VT), 1609 CurDAG->getTargetConstant(MSB, VT)}; 1610 return CurDAG->SelectNodeTo(N, Opc, VT, Ops); 1611 } 1612 1613 /// Does DstMask form a complementary pair with the mask provided by 1614 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking, 1615 /// this asks whether DstMask zeroes precisely those bits that will be set by 1616 /// the other half. 1617 static bool isBitfieldDstMask(uint64_t DstMask, APInt BitsToBeInserted, 1618 unsigned NumberOfIgnoredHighBits, EVT VT) { 1619 assert((VT == MVT::i32 || VT == MVT::i64) && 1620 "i32 or i64 mask type expected!"); 1621 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits; 1622 1623 APInt SignificantDstMask = APInt(BitWidth, DstMask); 1624 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth); 1625 1626 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 && 1627 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue(); 1628 } 1629 1630 // Look for bits that will be useful for later uses. 1631 // A bit is consider useless as soon as it is dropped and never used 1632 // before it as been dropped. 1633 // E.g., looking for useful bit of x 1634 // 1. y = x & 0x7 1635 // 2. z = y >> 2 1636 // After #1, x useful bits are 0x7, then the useful bits of x, live through 1637 // y. 1638 // After #2, the useful bits of x are 0x4. 1639 // However, if x is used on an unpredicatable instruction, then all its bits 1640 // are useful. 1641 // E.g. 1642 // 1. y = x & 0x7 1643 // 2. z = y >> 2 1644 // 3. str x, [@x] 1645 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0); 1646 1647 static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, 1648 unsigned Depth) { 1649 uint64_t Imm = 1650 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 1651 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); 1652 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm); 1653 getUsefulBits(Op, UsefulBits, Depth + 1); 1654 } 1655 1656 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, 1657 uint64_t Imm, uint64_t MSB, 1658 unsigned Depth) { 1659 // inherit the bitwidth value 1660 APInt OpUsefulBits(UsefulBits); 1661 OpUsefulBits = 1; 1662 1663 if (MSB >= Imm) { 1664 OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1); 1665 --OpUsefulBits; 1666 // The interesting part will be in the lower part of the result 1667 getUsefulBits(Op, OpUsefulBits, Depth + 1); 1668 // The interesting part was starting at Imm in the argument 1669 OpUsefulBits = OpUsefulBits.shl(Imm); 1670 } else { 1671 OpUsefulBits = OpUsefulBits.shl(MSB + 1); 1672 --OpUsefulBits; 1673 // The interesting part will be shifted in the result 1674 OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm); 1675 getUsefulBits(Op, OpUsefulBits, Depth + 1); 1676 // The interesting part was at zero in the argument 1677 OpUsefulBits = OpUsefulBits.lshr(OpUsefulBits.getBitWidth() - Imm); 1678 } 1679 1680 UsefulBits &= OpUsefulBits; 1681 } 1682 1683 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, 1684 unsigned Depth) { 1685 uint64_t Imm = 1686 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 1687 uint64_t MSB = 1688 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 1689 1690 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); 1691 } 1692 1693 static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, 1694 unsigned Depth) { 1695 uint64_t ShiftTypeAndValue = 1696 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 1697 APInt Mask(UsefulBits); 1698 Mask.clearAllBits(); 1699 Mask.flipAllBits(); 1700 1701 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) { 1702 // Shift Left 1703 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 1704 Mask = Mask.shl(ShiftAmt); 1705 getUsefulBits(Op, Mask, Depth + 1); 1706 Mask = Mask.lshr(ShiftAmt); 1707 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) { 1708 // Shift Right 1709 // We do not handle AArch64_AM::ASR, because the sign will change the 1710 // number of useful bits 1711 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 1712 Mask = Mask.lshr(ShiftAmt); 1713 getUsefulBits(Op, Mask, Depth + 1); 1714 Mask = Mask.shl(ShiftAmt); 1715 } else 1716 return; 1717 1718 UsefulBits &= Mask; 1719 } 1720 1721 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, 1722 unsigned Depth) { 1723 uint64_t Imm = 1724 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 1725 uint64_t MSB = 1726 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue(); 1727 1728 if (Op.getOperand(1) == Orig) 1729 return getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); 1730 1731 APInt OpUsefulBits(UsefulBits); 1732 OpUsefulBits = 1; 1733 1734 if (MSB >= Imm) { 1735 OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1); 1736 --OpUsefulBits; 1737 UsefulBits &= ~OpUsefulBits; 1738 getUsefulBits(Op, UsefulBits, Depth + 1); 1739 } else { 1740 OpUsefulBits = OpUsefulBits.shl(MSB + 1); 1741 --OpUsefulBits; 1742 UsefulBits = ~(OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm)); 1743 getUsefulBits(Op, UsefulBits, Depth + 1); 1744 } 1745 } 1746 1747 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, 1748 SDValue Orig, unsigned Depth) { 1749 1750 // Users of this node should have already been instruction selected 1751 // FIXME: Can we turn that into an assert? 1752 if (!UserNode->isMachineOpcode()) 1753 return; 1754 1755 switch (UserNode->getMachineOpcode()) { 1756 default: 1757 return; 1758 case AArch64::ANDSWri: 1759 case AArch64::ANDSXri: 1760 case AArch64::ANDWri: 1761 case AArch64::ANDXri: 1762 // We increment Depth only when we call the getUsefulBits 1763 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits, 1764 Depth); 1765 case AArch64::UBFMWri: 1766 case AArch64::UBFMXri: 1767 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth); 1768 1769 case AArch64::ORRWrs: 1770 case AArch64::ORRXrs: 1771 if (UserNode->getOperand(1) != Orig) 1772 return; 1773 return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits, 1774 Depth); 1775 case AArch64::BFMWri: 1776 case AArch64::BFMXri: 1777 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth); 1778 } 1779 } 1780 1781 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { 1782 if (Depth >= 6) 1783 return; 1784 // Initialize UsefulBits 1785 if (!Depth) { 1786 unsigned Bitwidth = Op.getValueType().getScalarType().getSizeInBits(); 1787 // At the beginning, assume every produced bits is useful 1788 UsefulBits = APInt(Bitwidth, 0); 1789 UsefulBits.flipAllBits(); 1790 } 1791 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0); 1792 1793 for (SDNode *Node : Op.getNode()->uses()) { 1794 // A use cannot produce useful bits 1795 APInt UsefulBitsForUse = APInt(UsefulBits); 1796 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth); 1797 UsersUsefulBits |= UsefulBitsForUse; 1798 } 1799 // UsefulBits contains the produced bits that are meaningful for the 1800 // current definition, thus a user cannot make a bit meaningful at 1801 // this point 1802 UsefulBits &= UsersUsefulBits; 1803 } 1804 1805 /// Create a machine node performing a notional SHL of Op by ShlAmount. If 1806 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is 1807 /// 0, return Op unchanged. 1808 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { 1809 if (ShlAmount == 0) 1810 return Op; 1811 1812 EVT VT = Op.getValueType(); 1813 unsigned BitWidth = VT.getSizeInBits(); 1814 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri; 1815 1816 SDNode *ShiftNode; 1817 if (ShlAmount > 0) { 1818 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt 1819 ShiftNode = CurDAG->getMachineNode( 1820 UBFMOpc, SDLoc(Op), VT, Op, 1821 CurDAG->getTargetConstant(BitWidth - ShlAmount, VT), 1822 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, VT)); 1823 } else { 1824 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1 1825 assert(ShlAmount < 0 && "expected right shift"); 1826 int ShrAmount = -ShlAmount; 1827 ShiftNode = CurDAG->getMachineNode( 1828 UBFMOpc, SDLoc(Op), VT, Op, CurDAG->getTargetConstant(ShrAmount, VT), 1829 CurDAG->getTargetConstant(BitWidth - 1, VT)); 1830 } 1831 1832 return SDValue(ShiftNode, 0); 1833 } 1834 1835 /// Does this tree qualify as an attempt to move a bitfield into position, 1836 /// essentially "(and (shl VAL, N), Mask)". 1837 static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, 1838 SDValue &Src, int &ShiftAmount, 1839 int &MaskWidth) { 1840 EVT VT = Op.getValueType(); 1841 unsigned BitWidth = VT.getSizeInBits(); 1842 (void)BitWidth; 1843 assert(BitWidth == 32 || BitWidth == 64); 1844 1845 APInt KnownZero, KnownOne; 1846 CurDAG->computeKnownBits(Op, KnownZero, KnownOne); 1847 1848 // Non-zero in the sense that they're not provably zero, which is the key 1849 // point if we want to use this value 1850 uint64_t NonZeroBits = (~KnownZero).getZExtValue(); 1851 1852 // Discard a constant AND mask if present. It's safe because the node will 1853 // already have been factored into the computeKnownBits calculation above. 1854 uint64_t AndImm; 1855 if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) { 1856 assert((~APInt(BitWidth, AndImm) & ~KnownZero) == 0); 1857 Op = Op.getOperand(0); 1858 } 1859 1860 uint64_t ShlImm; 1861 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm)) 1862 return false; 1863 Op = Op.getOperand(0); 1864 1865 if (!isShiftedMask_64(NonZeroBits)) 1866 return false; 1867 1868 ShiftAmount = countTrailingZeros(NonZeroBits); 1869 MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount); 1870 1871 // BFI encompasses sufficiently many nodes that it's worth inserting an extra 1872 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL 1873 // amount. 1874 Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount); 1875 1876 return true; 1877 } 1878 1879 // Given a OR operation, check if we have the following pattern 1880 // ubfm c, b, imm, imm2 (or something that does the same jobs, see 1881 // isBitfieldExtractOp) 1882 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and 1883 // countTrailingZeros(mask2) == imm2 - imm + 1 1884 // f = d | c 1885 // if yes, given reference arguments will be update so that one can replace 1886 // the OR instruction with: 1887 // f = Opc Opd0, Opd1, LSB, MSB ; where Opc is a BFM, LSB = imm, and MSB = imm2 1888 static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst, 1889 SDValue &Src, unsigned &ImmR, 1890 unsigned &ImmS, SelectionDAG *CurDAG) { 1891 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 1892 1893 // Set Opc 1894 EVT VT = N->getValueType(0); 1895 if (VT == MVT::i32) 1896 Opc = AArch64::BFMWri; 1897 else if (VT == MVT::i64) 1898 Opc = AArch64::BFMXri; 1899 else 1900 return false; 1901 1902 // Because of simplify-demanded-bits in DAGCombine, involved masks may not 1903 // have the expected shape. Try to undo that. 1904 APInt UsefulBits; 1905 getUsefulBits(SDValue(N, 0), UsefulBits); 1906 1907 unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros(); 1908 unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros(); 1909 1910 // OR is commutative, check both possibilities (does llvm provide a 1911 // way to do that directely, e.g., via code matcher?) 1912 SDValue OrOpd1Val = N->getOperand(1); 1913 SDNode *OrOpd0 = N->getOperand(0).getNode(); 1914 SDNode *OrOpd1 = N->getOperand(1).getNode(); 1915 for (int i = 0; i < 2; 1916 ++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) { 1917 unsigned BFXOpc; 1918 int DstLSB, Width; 1919 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS, 1920 NumberOfIgnoredLowBits, true)) { 1921 // Check that the returned opcode is compatible with the pattern, 1922 // i.e., same type and zero extended (U and not S) 1923 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) || 1924 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32)) 1925 continue; 1926 1927 // Compute the width of the bitfield insertion 1928 DstLSB = 0; 1929 Width = ImmS - ImmR + 1; 1930 // FIXME: This constraint is to catch bitfield insertion we may 1931 // want to widen the pattern if we want to grab general bitfied 1932 // move case 1933 if (Width <= 0) 1934 continue; 1935 1936 // If the mask on the insertee is correct, we have a BFXIL operation. We 1937 // can share the ImmR and ImmS values from the already-computed UBFM. 1938 } else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0), Src, 1939 DstLSB, Width)) { 1940 ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits(); 1941 ImmS = Width - 1; 1942 } else 1943 continue; 1944 1945 // Check the second part of the pattern 1946 EVT VT = OrOpd1->getValueType(0); 1947 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand"); 1948 1949 // Compute the Known Zero for the candidate of the first operand. 1950 // This allows to catch more general case than just looking for 1951 // AND with imm. Indeed, simplify-demanded-bits may have removed 1952 // the AND instruction because it proves it was useless. 1953 APInt KnownZero, KnownOne; 1954 CurDAG->computeKnownBits(OrOpd1Val, KnownZero, KnownOne); 1955 1956 // Check if there is enough room for the second operand to appear 1957 // in the first one 1958 APInt BitsToBeInserted = 1959 APInt::getBitsSet(KnownZero.getBitWidth(), DstLSB, DstLSB + Width); 1960 1961 if ((BitsToBeInserted & ~KnownZero) != 0) 1962 continue; 1963 1964 // Set the first operand 1965 uint64_t Imm; 1966 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) && 1967 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT)) 1968 // In that case, we can eliminate the AND 1969 Dst = OrOpd1->getOperand(0); 1970 else 1971 // Maybe the AND has been removed by simplify-demanded-bits 1972 // or is useful because it discards more bits 1973 Dst = OrOpd1Val; 1974 1975 // both parts match 1976 return true; 1977 } 1978 1979 return false; 1980 } 1981 1982 SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) { 1983 if (N->getOpcode() != ISD::OR) 1984 return nullptr; 1985 1986 unsigned Opc; 1987 unsigned LSB, MSB; 1988 SDValue Opd0, Opd1; 1989 1990 if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, CurDAG)) 1991 return nullptr; 1992 1993 EVT VT = N->getValueType(0); 1994 SDValue Ops[] = { Opd0, 1995 Opd1, 1996 CurDAG->getTargetConstant(LSB, VT), 1997 CurDAG->getTargetConstant(MSB, VT) }; 1998 return CurDAG->SelectNodeTo(N, Opc, VT, Ops); 1999 } 2000 2001 SDNode *AArch64DAGToDAGISel::SelectLIBM(SDNode *N) { 2002 EVT VT = N->getValueType(0); 2003 unsigned Variant; 2004 unsigned Opc; 2005 unsigned FRINTXOpcs[] = { AArch64::FRINTXSr, AArch64::FRINTXDr }; 2006 2007 if (VT == MVT::f32) { 2008 Variant = 0; 2009 } else if (VT == MVT::f64) { 2010 Variant = 1; 2011 } else 2012 return nullptr; // Unrecognized argument type. Fall back on default codegen. 2013 2014 // Pick the FRINTX variant needed to set the flags. 2015 unsigned FRINTXOpc = FRINTXOpcs[Variant]; 2016 2017 switch (N->getOpcode()) { 2018 default: 2019 return nullptr; // Unrecognized libm ISD node. Fall back on default codegen. 2020 case ISD::FCEIL: { 2021 unsigned FRINTPOpcs[] = { AArch64::FRINTPSr, AArch64::FRINTPDr }; 2022 Opc = FRINTPOpcs[Variant]; 2023 break; 2024 } 2025 case ISD::FFLOOR: { 2026 unsigned FRINTMOpcs[] = { AArch64::FRINTMSr, AArch64::FRINTMDr }; 2027 Opc = FRINTMOpcs[Variant]; 2028 break; 2029 } 2030 case ISD::FTRUNC: { 2031 unsigned FRINTZOpcs[] = { AArch64::FRINTZSr, AArch64::FRINTZDr }; 2032 Opc = FRINTZOpcs[Variant]; 2033 break; 2034 } 2035 case ISD::FROUND: { 2036 unsigned FRINTAOpcs[] = { AArch64::FRINTASr, AArch64::FRINTADr }; 2037 Opc = FRINTAOpcs[Variant]; 2038 break; 2039 } 2040 } 2041 2042 SDLoc dl(N); 2043 SDValue In = N->getOperand(0); 2044 SmallVector<SDValue, 2> Ops; 2045 Ops.push_back(In); 2046 2047 if (!TM.Options.UnsafeFPMath) { 2048 SDNode *FRINTX = CurDAG->getMachineNode(FRINTXOpc, dl, VT, MVT::Glue, In); 2049 Ops.push_back(SDValue(FRINTX, 1)); 2050 } 2051 2052 return CurDAG->getMachineNode(Opc, dl, VT, Ops); 2053 } 2054 2055 bool 2056 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, 2057 unsigned RegWidth) { 2058 APFloat FVal(0.0); 2059 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) 2060 FVal = CN->getValueAPF(); 2061 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) { 2062 // Some otherwise illegal constants are allowed in this case. 2063 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow || 2064 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1))) 2065 return false; 2066 2067 ConstantPoolSDNode *CN = 2068 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)); 2069 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF(); 2070 } else 2071 return false; 2072 2073 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits 2074 // is between 1 and 32 for a destination w-register, or 1 and 64 for an 2075 // x-register. 2076 // 2077 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we 2078 // want THIS_NODE to be 2^fbits. This is much easier to deal with using 2079 // integers. 2080 bool IsExact; 2081 2082 // fbits is between 1 and 64 in the worst-case, which means the fmul 2083 // could have 2^64 as an actual operand. Need 65 bits of precision. 2084 APSInt IntVal(65, true); 2085 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); 2086 2087 // N.b. isPowerOf2 also checks for > 0. 2088 if (!IsExact || !IntVal.isPowerOf2()) return false; 2089 unsigned FBits = IntVal.logBase2(); 2090 2091 // Checks above should have guaranteed that we haven't lost information in 2092 // finding FBits, but it must still be in range. 2093 if (FBits == 0 || FBits > RegWidth) return false; 2094 2095 FixedPos = CurDAG->getTargetConstant(FBits, MVT::i32); 2096 return true; 2097 } 2098 2099 SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { 2100 // Dump information about the Node being selected 2101 DEBUG(errs() << "Selecting: "); 2102 DEBUG(Node->dump(CurDAG)); 2103 DEBUG(errs() << "\n"); 2104 2105 // If we have a custom node, we already have selected! 2106 if (Node->isMachineOpcode()) { 2107 DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); 2108 Node->setNodeId(-1); 2109 return nullptr; 2110 } 2111 2112 // Few custom selection stuff. 2113 SDNode *ResNode = nullptr; 2114 EVT VT = Node->getValueType(0); 2115 2116 switch (Node->getOpcode()) { 2117 default: 2118 break; 2119 2120 case ISD::ADD: 2121 if (SDNode *I = SelectMLAV64LaneV128(Node)) 2122 return I; 2123 break; 2124 2125 case ISD::LOAD: { 2126 // Try to select as an indexed load. Fall through to normal processing 2127 // if we can't. 2128 bool Done = false; 2129 SDNode *I = SelectIndexedLoad(Node, Done); 2130 if (Done) 2131 return I; 2132 break; 2133 } 2134 2135 case ISD::SRL: 2136 case ISD::AND: 2137 case ISD::SRA: 2138 if (SDNode *I = SelectBitfieldExtractOp(Node)) 2139 return I; 2140 break; 2141 2142 case ISD::OR: 2143 if (SDNode *I = SelectBitfieldInsertOp(Node)) 2144 return I; 2145 break; 2146 2147 case ISD::EXTRACT_VECTOR_ELT: { 2148 // Extracting lane zero is a special case where we can just use a plain 2149 // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for 2150 // the rest of the compiler, especially the register allocator and copyi 2151 // propagation, to reason about, so is preferred when it's possible to 2152 // use it. 2153 ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1)); 2154 // Bail and use the default Select() for non-zero lanes. 2155 if (LaneNode->getZExtValue() != 0) 2156 break; 2157 // If the element type is not the same as the result type, likewise 2158 // bail and use the default Select(), as there's more to do than just 2159 // a cross-class COPY. This catches extracts of i8 and i16 elements 2160 // since they will need an explicit zext. 2161 if (VT != Node->getOperand(0).getValueType().getVectorElementType()) 2162 break; 2163 unsigned SubReg; 2164 switch (Node->getOperand(0) 2165 .getValueType() 2166 .getVectorElementType() 2167 .getSizeInBits()) { 2168 default: 2169 llvm_unreachable("Unexpected vector element type!"); 2170 case 64: 2171 SubReg = AArch64::dsub; 2172 break; 2173 case 32: 2174 SubReg = AArch64::ssub; 2175 break; 2176 case 16: 2177 SubReg = AArch64::hsub; 2178 break; 2179 case 8: 2180 llvm_unreachable("unexpected zext-requiring extract element!"); 2181 } 2182 SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT, 2183 Node->getOperand(0)); 2184 DEBUG(dbgs() << "ISEL: Custom selection!\n=> "); 2185 DEBUG(Extract->dumpr(CurDAG)); 2186 DEBUG(dbgs() << "\n"); 2187 return Extract.getNode(); 2188 } 2189 case ISD::Constant: { 2190 // Materialize zero constants as copies from WZR/XZR. This allows 2191 // the coalescer to propagate these into other instructions. 2192 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node); 2193 if (ConstNode->isNullValue()) { 2194 if (VT == MVT::i32) 2195 return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node), 2196 AArch64::WZR, MVT::i32).getNode(); 2197 else if (VT == MVT::i64) 2198 return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node), 2199 AArch64::XZR, MVT::i64).getNode(); 2200 } 2201 break; 2202 } 2203 2204 case ISD::FrameIndex: { 2205 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. 2206 int FI = cast<FrameIndexSDNode>(Node)->getIndex(); 2207 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); 2208 const TargetLowering *TLI = getTargetLowering(); 2209 SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); 2210 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), 2211 CurDAG->getTargetConstant(Shifter, MVT::i32) }; 2212 return CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops); 2213 } 2214 case ISD::INTRINSIC_W_CHAIN: { 2215 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 2216 switch (IntNo) { 2217 default: 2218 break; 2219 case Intrinsic::aarch64_ldaxp: 2220 case Intrinsic::aarch64_ldxp: { 2221 unsigned Op = 2222 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX; 2223 SDValue MemAddr = Node->getOperand(2); 2224 SDLoc DL(Node); 2225 SDValue Chain = Node->getOperand(0); 2226 2227 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64, 2228 MVT::Other, MemAddr, Chain); 2229 2230 // Transfer memoperands. 2231 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 2232 MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 2233 cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1); 2234 return Ld; 2235 } 2236 case Intrinsic::aarch64_stlxp: 2237 case Intrinsic::aarch64_stxp: { 2238 unsigned Op = 2239 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX; 2240 SDLoc DL(Node); 2241 SDValue Chain = Node->getOperand(0); 2242 SDValue ValLo = Node->getOperand(2); 2243 SDValue ValHi = Node->getOperand(3); 2244 SDValue MemAddr = Node->getOperand(4); 2245 2246 // Place arguments in the right order. 2247 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain}; 2248 2249 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops); 2250 // Transfer memoperands. 2251 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 2252 MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 2253 cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1); 2254 2255 return St; 2256 } 2257 case Intrinsic::aarch64_neon_ld1x2: 2258 if (VT == MVT::v8i8) 2259 return SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0); 2260 else if (VT == MVT::v16i8) 2261 return SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0); 2262 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2263 return SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0); 2264 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2265 return SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0); 2266 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2267 return SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0); 2268 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2269 return SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0); 2270 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2271 return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 2272 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2273 return SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0); 2274 break; 2275 case Intrinsic::aarch64_neon_ld1x3: 2276 if (VT == MVT::v8i8) 2277 return SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0); 2278 else if (VT == MVT::v16i8) 2279 return SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0); 2280 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2281 return SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0); 2282 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2283 return SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0); 2284 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2285 return SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0); 2286 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2287 return SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0); 2288 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2289 return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 2290 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2291 return SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0); 2292 break; 2293 case Intrinsic::aarch64_neon_ld1x4: 2294 if (VT == MVT::v8i8) 2295 return SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0); 2296 else if (VT == MVT::v16i8) 2297 return SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0); 2298 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2299 return SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0); 2300 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2301 return SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0); 2302 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2303 return SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0); 2304 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2305 return SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0); 2306 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2307 return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 2308 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2309 return SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0); 2310 break; 2311 case Intrinsic::aarch64_neon_ld2: 2312 if (VT == MVT::v8i8) 2313 return SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0); 2314 else if (VT == MVT::v16i8) 2315 return SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0); 2316 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2317 return SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0); 2318 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2319 return SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0); 2320 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2321 return SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0); 2322 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2323 return SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0); 2324 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2325 return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 2326 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2327 return SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0); 2328 break; 2329 case Intrinsic::aarch64_neon_ld3: 2330 if (VT == MVT::v8i8) 2331 return SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0); 2332 else if (VT == MVT::v16i8) 2333 return SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0); 2334 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2335 return SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0); 2336 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2337 return SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0); 2338 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2339 return SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0); 2340 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2341 return SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0); 2342 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2343 return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 2344 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2345 return SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0); 2346 break; 2347 case Intrinsic::aarch64_neon_ld4: 2348 if (VT == MVT::v8i8) 2349 return SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0); 2350 else if (VT == MVT::v16i8) 2351 return SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0); 2352 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2353 return SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0); 2354 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2355 return SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0); 2356 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2357 return SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0); 2358 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2359 return SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0); 2360 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2361 return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 2362 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2363 return SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0); 2364 break; 2365 case Intrinsic::aarch64_neon_ld2r: 2366 if (VT == MVT::v8i8) 2367 return SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0); 2368 else if (VT == MVT::v16i8) 2369 return SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0); 2370 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2371 return SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0); 2372 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2373 return SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0); 2374 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2375 return SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0); 2376 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2377 return SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0); 2378 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2379 return SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0); 2380 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2381 return SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0); 2382 break; 2383 case Intrinsic::aarch64_neon_ld3r: 2384 if (VT == MVT::v8i8) 2385 return SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0); 2386 else if (VT == MVT::v16i8) 2387 return SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0); 2388 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2389 return SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0); 2390 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2391 return SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0); 2392 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2393 return SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0); 2394 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2395 return SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0); 2396 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2397 return SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0); 2398 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2399 return SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0); 2400 break; 2401 case Intrinsic::aarch64_neon_ld4r: 2402 if (VT == MVT::v8i8) 2403 return SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0); 2404 else if (VT == MVT::v16i8) 2405 return SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0); 2406 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2407 return SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0); 2408 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2409 return SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0); 2410 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2411 return SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0); 2412 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2413 return SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0); 2414 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2415 return SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0); 2416 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2417 return SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0); 2418 break; 2419 case Intrinsic::aarch64_neon_ld2lane: 2420 if (VT == MVT::v16i8 || VT == MVT::v8i8) 2421 return SelectLoadLane(Node, 2, AArch64::LD2i8); 2422 else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 2423 VT == MVT::v8f16) 2424 return SelectLoadLane(Node, 2, AArch64::LD2i16); 2425 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 2426 VT == MVT::v2f32) 2427 return SelectLoadLane(Node, 2, AArch64::LD2i32); 2428 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 2429 VT == MVT::v1f64) 2430 return SelectLoadLane(Node, 2, AArch64::LD2i64); 2431 break; 2432 case Intrinsic::aarch64_neon_ld3lane: 2433 if (VT == MVT::v16i8 || VT == MVT::v8i8) 2434 return SelectLoadLane(Node, 3, AArch64::LD3i8); 2435 else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 2436 VT == MVT::v8f16) 2437 return SelectLoadLane(Node, 3, AArch64::LD3i16); 2438 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 2439 VT == MVT::v2f32) 2440 return SelectLoadLane(Node, 3, AArch64::LD3i32); 2441 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 2442 VT == MVT::v1f64) 2443 return SelectLoadLane(Node, 3, AArch64::LD3i64); 2444 break; 2445 case Intrinsic::aarch64_neon_ld4lane: 2446 if (VT == MVT::v16i8 || VT == MVT::v8i8) 2447 return SelectLoadLane(Node, 4, AArch64::LD4i8); 2448 else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 2449 VT == MVT::v8f16) 2450 return SelectLoadLane(Node, 4, AArch64::LD4i16); 2451 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 2452 VT == MVT::v2f32) 2453 return SelectLoadLane(Node, 4, AArch64::LD4i32); 2454 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 2455 VT == MVT::v1f64) 2456 return SelectLoadLane(Node, 4, AArch64::LD4i64); 2457 break; 2458 } 2459 } break; 2460 case ISD::INTRINSIC_WO_CHAIN: { 2461 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); 2462 switch (IntNo) { 2463 default: 2464 break; 2465 case Intrinsic::aarch64_neon_tbl2: 2466 return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBLv8i8Two 2467 : AArch64::TBLv16i8Two, 2468 false); 2469 case Intrinsic::aarch64_neon_tbl3: 2470 return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three 2471 : AArch64::TBLv16i8Three, 2472 false); 2473 case Intrinsic::aarch64_neon_tbl4: 2474 return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four 2475 : AArch64::TBLv16i8Four, 2476 false); 2477 case Intrinsic::aarch64_neon_tbx2: 2478 return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBXv8i8Two 2479 : AArch64::TBXv16i8Two, 2480 true); 2481 case Intrinsic::aarch64_neon_tbx3: 2482 return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three 2483 : AArch64::TBXv16i8Three, 2484 true); 2485 case Intrinsic::aarch64_neon_tbx4: 2486 return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four 2487 : AArch64::TBXv16i8Four, 2488 true); 2489 case Intrinsic::aarch64_neon_smull: 2490 case Intrinsic::aarch64_neon_umull: 2491 if (SDNode *N = SelectMULLV64LaneV128(IntNo, Node)) 2492 return N; 2493 break; 2494 } 2495 break; 2496 } 2497 case ISD::INTRINSIC_VOID: { 2498 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 2499 if (Node->getNumOperands() >= 3) 2500 VT = Node->getOperand(2)->getValueType(0); 2501 switch (IntNo) { 2502 default: 2503 break; 2504 case Intrinsic::aarch64_neon_st1x2: { 2505 if (VT == MVT::v8i8) 2506 return SelectStore(Node, 2, AArch64::ST1Twov8b); 2507 else if (VT == MVT::v16i8) 2508 return SelectStore(Node, 2, AArch64::ST1Twov16b); 2509 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2510 return SelectStore(Node, 2, AArch64::ST1Twov4h); 2511 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2512 return SelectStore(Node, 2, AArch64::ST1Twov8h); 2513 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2514 return SelectStore(Node, 2, AArch64::ST1Twov2s); 2515 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2516 return SelectStore(Node, 2, AArch64::ST1Twov4s); 2517 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2518 return SelectStore(Node, 2, AArch64::ST1Twov2d); 2519 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2520 return SelectStore(Node, 2, AArch64::ST1Twov1d); 2521 break; 2522 } 2523 case Intrinsic::aarch64_neon_st1x3: { 2524 if (VT == MVT::v8i8) 2525 return SelectStore(Node, 3, AArch64::ST1Threev8b); 2526 else if (VT == MVT::v16i8) 2527 return SelectStore(Node, 3, AArch64::ST1Threev16b); 2528 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2529 return SelectStore(Node, 3, AArch64::ST1Threev4h); 2530 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2531 return SelectStore(Node, 3, AArch64::ST1Threev8h); 2532 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2533 return SelectStore(Node, 3, AArch64::ST1Threev2s); 2534 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2535 return SelectStore(Node, 3, AArch64::ST1Threev4s); 2536 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2537 return SelectStore(Node, 3, AArch64::ST1Threev2d); 2538 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2539 return SelectStore(Node, 3, AArch64::ST1Threev1d); 2540 break; 2541 } 2542 case Intrinsic::aarch64_neon_st1x4: { 2543 if (VT == MVT::v8i8) 2544 return SelectStore(Node, 4, AArch64::ST1Fourv8b); 2545 else if (VT == MVT::v16i8) 2546 return SelectStore(Node, 4, AArch64::ST1Fourv16b); 2547 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2548 return SelectStore(Node, 4, AArch64::ST1Fourv4h); 2549 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2550 return SelectStore(Node, 4, AArch64::ST1Fourv8h); 2551 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2552 return SelectStore(Node, 4, AArch64::ST1Fourv2s); 2553 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2554 return SelectStore(Node, 4, AArch64::ST1Fourv4s); 2555 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2556 return SelectStore(Node, 4, AArch64::ST1Fourv2d); 2557 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2558 return SelectStore(Node, 4, AArch64::ST1Fourv1d); 2559 break; 2560 } 2561 case Intrinsic::aarch64_neon_st2: { 2562 if (VT == MVT::v8i8) 2563 return SelectStore(Node, 2, AArch64::ST2Twov8b); 2564 else if (VT == MVT::v16i8) 2565 return SelectStore(Node, 2, AArch64::ST2Twov16b); 2566 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2567 return SelectStore(Node, 2, AArch64::ST2Twov4h); 2568 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2569 return SelectStore(Node, 2, AArch64::ST2Twov8h); 2570 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2571 return SelectStore(Node, 2, AArch64::ST2Twov2s); 2572 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2573 return SelectStore(Node, 2, AArch64::ST2Twov4s); 2574 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2575 return SelectStore(Node, 2, AArch64::ST2Twov2d); 2576 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2577 return SelectStore(Node, 2, AArch64::ST1Twov1d); 2578 break; 2579 } 2580 case Intrinsic::aarch64_neon_st3: { 2581 if (VT == MVT::v8i8) 2582 return SelectStore(Node, 3, AArch64::ST3Threev8b); 2583 else if (VT == MVT::v16i8) 2584 return SelectStore(Node, 3, AArch64::ST3Threev16b); 2585 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2586 return SelectStore(Node, 3, AArch64::ST3Threev4h); 2587 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2588 return SelectStore(Node, 3, AArch64::ST3Threev8h); 2589 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2590 return SelectStore(Node, 3, AArch64::ST3Threev2s); 2591 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2592 return SelectStore(Node, 3, AArch64::ST3Threev4s); 2593 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2594 return SelectStore(Node, 3, AArch64::ST3Threev2d); 2595 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2596 return SelectStore(Node, 3, AArch64::ST1Threev1d); 2597 break; 2598 } 2599 case Intrinsic::aarch64_neon_st4: { 2600 if (VT == MVT::v8i8) 2601 return SelectStore(Node, 4, AArch64::ST4Fourv8b); 2602 else if (VT == MVT::v16i8) 2603 return SelectStore(Node, 4, AArch64::ST4Fourv16b); 2604 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2605 return SelectStore(Node, 4, AArch64::ST4Fourv4h); 2606 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2607 return SelectStore(Node, 4, AArch64::ST4Fourv8h); 2608 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2609 return SelectStore(Node, 4, AArch64::ST4Fourv2s); 2610 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2611 return SelectStore(Node, 4, AArch64::ST4Fourv4s); 2612 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2613 return SelectStore(Node, 4, AArch64::ST4Fourv2d); 2614 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2615 return SelectStore(Node, 4, AArch64::ST1Fourv1d); 2616 break; 2617 } 2618 case Intrinsic::aarch64_neon_st2lane: { 2619 if (VT == MVT::v16i8 || VT == MVT::v8i8) 2620 return SelectStoreLane(Node, 2, AArch64::ST2i8); 2621 else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 2622 VT == MVT::v8f16) 2623 return SelectStoreLane(Node, 2, AArch64::ST2i16); 2624 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 2625 VT == MVT::v2f32) 2626 return SelectStoreLane(Node, 2, AArch64::ST2i32); 2627 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 2628 VT == MVT::v1f64) 2629 return SelectStoreLane(Node, 2, AArch64::ST2i64); 2630 break; 2631 } 2632 case Intrinsic::aarch64_neon_st3lane: { 2633 if (VT == MVT::v16i8 || VT == MVT::v8i8) 2634 return SelectStoreLane(Node, 3, AArch64::ST3i8); 2635 else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 2636 VT == MVT::v8f16) 2637 return SelectStoreLane(Node, 3, AArch64::ST3i16); 2638 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 2639 VT == MVT::v2f32) 2640 return SelectStoreLane(Node, 3, AArch64::ST3i32); 2641 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 2642 VT == MVT::v1f64) 2643 return SelectStoreLane(Node, 3, AArch64::ST3i64); 2644 break; 2645 } 2646 case Intrinsic::aarch64_neon_st4lane: { 2647 if (VT == MVT::v16i8 || VT == MVT::v8i8) 2648 return SelectStoreLane(Node, 4, AArch64::ST4i8); 2649 else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 2650 VT == MVT::v8f16) 2651 return SelectStoreLane(Node, 4, AArch64::ST4i16); 2652 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 2653 VT == MVT::v2f32) 2654 return SelectStoreLane(Node, 4, AArch64::ST4i32); 2655 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 2656 VT == MVT::v1f64) 2657 return SelectStoreLane(Node, 4, AArch64::ST4i64); 2658 break; 2659 } 2660 } 2661 } 2662 case AArch64ISD::LD2post: { 2663 if (VT == MVT::v8i8) 2664 return SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0); 2665 else if (VT == MVT::v16i8) 2666 return SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0); 2667 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2668 return SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0); 2669 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2670 return SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0); 2671 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2672 return SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0); 2673 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2674 return SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0); 2675 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2676 return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 2677 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2678 return SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0); 2679 break; 2680 } 2681 case AArch64ISD::LD3post: { 2682 if (VT == MVT::v8i8) 2683 return SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0); 2684 else if (VT == MVT::v16i8) 2685 return SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0); 2686 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2687 return SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0); 2688 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2689 return SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0); 2690 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2691 return SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0); 2692 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2693 return SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0); 2694 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2695 return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 2696 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2697 return SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0); 2698 break; 2699 } 2700 case AArch64ISD::LD4post: { 2701 if (VT == MVT::v8i8) 2702 return SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0); 2703 else if (VT == MVT::v16i8) 2704 return SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0); 2705 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2706 return SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0); 2707 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2708 return SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0); 2709 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2710 return SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0); 2711 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2712 return SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0); 2713 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2714 return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 2715 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2716 return SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0); 2717 break; 2718 } 2719 case AArch64ISD::LD1x2post: { 2720 if (VT == MVT::v8i8) 2721 return SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0); 2722 else if (VT == MVT::v16i8) 2723 return SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0); 2724 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2725 return SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0); 2726 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2727 return SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0); 2728 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2729 return SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0); 2730 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2731 return SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0); 2732 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2733 return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 2734 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2735 return SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0); 2736 break; 2737 } 2738 case AArch64ISD::LD1x3post: { 2739 if (VT == MVT::v8i8) 2740 return SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0); 2741 else if (VT == MVT::v16i8) 2742 return SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0); 2743 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2744 return SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0); 2745 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2746 return SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0); 2747 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2748 return SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0); 2749 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2750 return SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0); 2751 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2752 return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 2753 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2754 return SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0); 2755 break; 2756 } 2757 case AArch64ISD::LD1x4post: { 2758 if (VT == MVT::v8i8) 2759 return SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0); 2760 else if (VT == MVT::v16i8) 2761 return SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0); 2762 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2763 return SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0); 2764 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2765 return SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0); 2766 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2767 return SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0); 2768 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2769 return SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0); 2770 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2771 return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 2772 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2773 return SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0); 2774 break; 2775 } 2776 case AArch64ISD::LD1DUPpost: { 2777 if (VT == MVT::v8i8) 2778 return SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0); 2779 else if (VT == MVT::v16i8) 2780 return SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0); 2781 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2782 return SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0); 2783 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2784 return SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0); 2785 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2786 return SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0); 2787 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2788 return SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0); 2789 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2790 return SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0); 2791 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2792 return SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0); 2793 break; 2794 } 2795 case AArch64ISD::LD2DUPpost: { 2796 if (VT == MVT::v8i8) 2797 return SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0); 2798 else if (VT == MVT::v16i8) 2799 return SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0); 2800 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2801 return SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0); 2802 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2803 return SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0); 2804 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2805 return SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0); 2806 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2807 return SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0); 2808 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2809 return SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0); 2810 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2811 return SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0); 2812 break; 2813 } 2814 case AArch64ISD::LD3DUPpost: { 2815 if (VT == MVT::v8i8) 2816 return SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0); 2817 else if (VT == MVT::v16i8) 2818 return SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0); 2819 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2820 return SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0); 2821 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2822 return SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0); 2823 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2824 return SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0); 2825 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2826 return SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0); 2827 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2828 return SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0); 2829 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2830 return SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0); 2831 break; 2832 } 2833 case AArch64ISD::LD4DUPpost: { 2834 if (VT == MVT::v8i8) 2835 return SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0); 2836 else if (VT == MVT::v16i8) 2837 return SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0); 2838 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2839 return SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0); 2840 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2841 return SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0); 2842 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2843 return SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0); 2844 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2845 return SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0); 2846 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2847 return SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0); 2848 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2849 return SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0); 2850 break; 2851 } 2852 case AArch64ISD::LD1LANEpost: { 2853 if (VT == MVT::v16i8 || VT == MVT::v8i8) 2854 return SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST); 2855 else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 2856 VT == MVT::v8f16) 2857 return SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST); 2858 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 2859 VT == MVT::v2f32) 2860 return SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST); 2861 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 2862 VT == MVT::v1f64) 2863 return SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST); 2864 break; 2865 } 2866 case AArch64ISD::LD2LANEpost: { 2867 if (VT == MVT::v16i8 || VT == MVT::v8i8) 2868 return SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST); 2869 else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 2870 VT == MVT::v8f16) 2871 return SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST); 2872 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 2873 VT == MVT::v2f32) 2874 return SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST); 2875 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 2876 VT == MVT::v1f64) 2877 return SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST); 2878 break; 2879 } 2880 case AArch64ISD::LD3LANEpost: { 2881 if (VT == MVT::v16i8 || VT == MVT::v8i8) 2882 return SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST); 2883 else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 2884 VT == MVT::v8f16) 2885 return SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST); 2886 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 2887 VT == MVT::v2f32) 2888 return SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST); 2889 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 2890 VT == MVT::v1f64) 2891 return SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST); 2892 break; 2893 } 2894 case AArch64ISD::LD4LANEpost: { 2895 if (VT == MVT::v16i8 || VT == MVT::v8i8) 2896 return SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST); 2897 else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 2898 VT == MVT::v8f16) 2899 return SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST); 2900 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 2901 VT == MVT::v2f32) 2902 return SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST); 2903 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 2904 VT == MVT::v1f64) 2905 return SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST); 2906 break; 2907 } 2908 case AArch64ISD::ST2post: { 2909 VT = Node->getOperand(1).getValueType(); 2910 if (VT == MVT::v8i8) 2911 return SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST); 2912 else if (VT == MVT::v16i8) 2913 return SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST); 2914 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2915 return SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST); 2916 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2917 return SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST); 2918 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2919 return SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST); 2920 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2921 return SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST); 2922 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2923 return SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST); 2924 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2925 return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 2926 break; 2927 } 2928 case AArch64ISD::ST3post: { 2929 VT = Node->getOperand(1).getValueType(); 2930 if (VT == MVT::v8i8) 2931 return SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST); 2932 else if (VT == MVT::v16i8) 2933 return SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST); 2934 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2935 return SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST); 2936 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2937 return SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST); 2938 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2939 return SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST); 2940 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2941 return SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST); 2942 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2943 return SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST); 2944 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2945 return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 2946 break; 2947 } 2948 case AArch64ISD::ST4post: { 2949 VT = Node->getOperand(1).getValueType(); 2950 if (VT == MVT::v8i8) 2951 return SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST); 2952 else if (VT == MVT::v16i8) 2953 return SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST); 2954 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2955 return SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST); 2956 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2957 return SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST); 2958 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2959 return SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST); 2960 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2961 return SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST); 2962 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2963 return SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST); 2964 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2965 return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 2966 break; 2967 } 2968 case AArch64ISD::ST1x2post: { 2969 VT = Node->getOperand(1).getValueType(); 2970 if (VT == MVT::v8i8) 2971 return SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST); 2972 else if (VT == MVT::v16i8) 2973 return SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST); 2974 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2975 return SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST); 2976 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2977 return SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST); 2978 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2979 return SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST); 2980 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2981 return SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST); 2982 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2983 return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 2984 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2985 return SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST); 2986 break; 2987 } 2988 case AArch64ISD::ST1x3post: { 2989 VT = Node->getOperand(1).getValueType(); 2990 if (VT == MVT::v8i8) 2991 return SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST); 2992 else if (VT == MVT::v16i8) 2993 return SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST); 2994 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 2995 return SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST); 2996 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 2997 return SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST); 2998 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2999 return SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST); 3000 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 3001 return SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST); 3002 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 3003 return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 3004 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 3005 return SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST); 3006 break; 3007 } 3008 case AArch64ISD::ST1x4post: { 3009 VT = Node->getOperand(1).getValueType(); 3010 if (VT == MVT::v8i8) 3011 return SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST); 3012 else if (VT == MVT::v16i8) 3013 return SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST); 3014 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 3015 return SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST); 3016 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 3017 return SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST); 3018 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 3019 return SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST); 3020 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 3021 return SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST); 3022 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 3023 return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 3024 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 3025 return SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST); 3026 break; 3027 } 3028 case AArch64ISD::ST2LANEpost: { 3029 VT = Node->getOperand(1).getValueType(); 3030 if (VT == MVT::v16i8 || VT == MVT::v8i8) 3031 return SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST); 3032 else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3033 VT == MVT::v8f16) 3034 return SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST); 3035 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3036 VT == MVT::v2f32) 3037 return SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST); 3038 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3039 VT == MVT::v1f64) 3040 return SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST); 3041 break; 3042 } 3043 case AArch64ISD::ST3LANEpost: { 3044 VT = Node->getOperand(1).getValueType(); 3045 if (VT == MVT::v16i8 || VT == MVT::v8i8) 3046 return SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST); 3047 else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3048 VT == MVT::v8f16) 3049 return SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST); 3050 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3051 VT == MVT::v2f32) 3052 return SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST); 3053 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3054 VT == MVT::v1f64) 3055 return SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST); 3056 break; 3057 } 3058 case AArch64ISD::ST4LANEpost: { 3059 VT = Node->getOperand(1).getValueType(); 3060 if (VT == MVT::v16i8 || VT == MVT::v8i8) 3061 return SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST); 3062 else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3063 VT == MVT::v8f16) 3064 return SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST); 3065 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3066 VT == MVT::v2f32) 3067 return SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST); 3068 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3069 VT == MVT::v1f64) 3070 return SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST); 3071 break; 3072 } 3073 3074 case ISD::FCEIL: 3075 case ISD::FFLOOR: 3076 case ISD::FTRUNC: 3077 case ISD::FROUND: 3078 if (SDNode *I = SelectLIBM(Node)) 3079 return I; 3080 break; 3081 } 3082 3083 // Select the default instruction 3084 ResNode = SelectCode(Node); 3085 3086 DEBUG(errs() << "=> "); 3087 if (ResNode == nullptr || ResNode == Node) 3088 DEBUG(Node->dump(CurDAG)); 3089 else 3090 DEBUG(ResNode->dump(CurDAG)); 3091 DEBUG(errs() << "\n"); 3092 3093 return ResNode; 3094 } 3095 3096 /// createAArch64ISelDag - This pass converts a legalized DAG into a 3097 /// AArch64-specific DAG, ready for instruction scheduling. 3098 FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM, 3099 CodeGenOpt::Level OptLevel) { 3100 return new AArch64DAGToDAGISel(TM, OptLevel); 3101 } 3102