1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines an instruction selector for the ARM target. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "ARM.h" 15 #include "ARMBaseInstrInfo.h" 16 #include "ARMTargetMachine.h" 17 #include "MCTargetDesc/ARMAddressingModes.h" 18 #include "llvm/ADT/StringSwitch.h" 19 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/CodeGen/MachineFunction.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/SelectionDAG.h" 24 #include "llvm/CodeGen/SelectionDAGISel.h" 25 #include "llvm/IR/CallingConv.h" 26 #include "llvm/IR/Constants.h" 27 #include "llvm/IR/DerivedTypes.h" 28 #include "llvm/IR/Function.h" 29 #include "llvm/IR/Intrinsics.h" 30 #include "llvm/IR/LLVMContext.h" 31 #include "llvm/Support/CommandLine.h" 32 #include "llvm/Support/Compiler.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Target/TargetLowering.h" 36 #include "llvm/Target/TargetOptions.h" 37 38 using namespace llvm; 39 40 #define DEBUG_TYPE "arm-isel" 41 42 static cl::opt<bool> 43 DisableShifterOp("disable-shifter-op", cl::Hidden, 44 cl::desc("Disable isel of shifter-op"), 45 cl::init(false)); 46 47 static cl::opt<bool> 48 CheckVMLxHazard("check-vmlx-hazard", cl::Hidden, 49 cl::desc("Check fp vmla / vmls hazard at isel time"), 50 cl::init(true)); 51 52 //===--------------------------------------------------------------------===// 53 /// ARMDAGToDAGISel - ARM specific code to select ARM machine 54 /// instructions for SelectionDAG operations. 55 /// 56 namespace { 57 58 enum AddrMode2Type { 59 AM2_BASE, // Simple AM2 (+-imm12) 60 AM2_SHOP // Shifter-op AM2 61 }; 62 63 class ARMDAGToDAGISel : public SelectionDAGISel { 64 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 65 /// make the right decision when generating code for different targets. 66 const ARMSubtarget *Subtarget; 67 68 public: 69 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel) 70 : SelectionDAGISel(tm, OptLevel) {} 71 72 bool runOnMachineFunction(MachineFunction &MF) override { 73 // Reset the subtarget each time through. 74 Subtarget = &MF.getSubtarget<ARMSubtarget>(); 75 SelectionDAGISel::runOnMachineFunction(MF); 76 return true; 77 } 78 79 const char *getPassName() const override { 80 return "ARM Instruction Selection"; 81 } 82 83 void PreprocessISelDAG() override; 84 85 /// getI32Imm - Return a target constant of type i32 with the specified 86 /// value. 87 inline SDValue getI32Imm(unsigned Imm, SDLoc dl) { 88 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 89 } 90 91 SDNode *Select(SDNode *N) override; 92 93 94 bool hasNoVMLxHazardUse(SDNode *N) const; 95 bool isShifterOpProfitable(const SDValue &Shift, 96 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); 97 bool SelectRegShifterOperand(SDValue N, SDValue &A, 98 SDValue &B, SDValue &C, 99 bool CheckProfitability = true); 100 bool SelectImmShifterOperand(SDValue N, SDValue &A, 101 SDValue &B, bool CheckProfitability = true); 102 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, 103 SDValue &B, SDValue &C) { 104 // Don't apply the profitability check 105 return SelectRegShifterOperand(N, A, B, C, false); 106 } 107 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, 108 SDValue &B) { 109 // Don't apply the profitability check 110 return SelectImmShifterOperand(N, A, B, false); 111 } 112 113 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 114 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); 115 116 AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base, 117 SDValue &Offset, SDValue &Opc); 118 bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset, 119 SDValue &Opc) { 120 return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE; 121 } 122 123 bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset, 124 SDValue &Opc) { 125 return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP; 126 } 127 128 bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset, 129 SDValue &Opc) { 130 SelectAddrMode2Worker(N, Base, Offset, Opc); 131 // return SelectAddrMode2ShOp(N, Base, Offset, Opc); 132 // This always matches one way or another. 133 return true; 134 } 135 136 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { 137 const ConstantSDNode *CN = cast<ConstantSDNode>(N); 138 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32); 139 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32); 140 return true; 141 } 142 143 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 144 SDValue &Offset, SDValue &Opc); 145 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 146 SDValue &Offset, SDValue &Opc); 147 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 148 SDValue &Offset, SDValue &Opc); 149 bool SelectAddrOffsetNone(SDValue N, SDValue &Base); 150 bool SelectAddrMode3(SDValue N, SDValue &Base, 151 SDValue &Offset, SDValue &Opc); 152 bool SelectAddrMode3Offset(SDNode *Op, SDValue N, 153 SDValue &Offset, SDValue &Opc); 154 bool SelectAddrMode5(SDValue N, SDValue &Base, 155 SDValue &Offset); 156 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); 157 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); 158 159 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); 160 161 // Thumb Addressing Modes: 162 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); 163 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, 164 SDValue &OffImm); 165 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 166 SDValue &OffImm); 167 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 168 SDValue &OffImm); 169 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 170 SDValue &OffImm); 171 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); 172 173 // Thumb 2 Addressing Modes: 174 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 175 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, 176 SDValue &OffImm); 177 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 178 SDValue &OffImm); 179 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, 180 SDValue &OffReg, SDValue &ShImm); 181 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); 182 183 inline bool is_so_imm(unsigned Imm) const { 184 return ARM_AM::getSOImmVal(Imm) != -1; 185 } 186 187 inline bool is_so_imm_not(unsigned Imm) const { 188 return ARM_AM::getSOImmVal(~Imm) != -1; 189 } 190 191 inline bool is_t2_so_imm(unsigned Imm) const { 192 return ARM_AM::getT2SOImmVal(Imm) != -1; 193 } 194 195 inline bool is_t2_so_imm_not(unsigned Imm) const { 196 return ARM_AM::getT2SOImmVal(~Imm) != -1; 197 } 198 199 // Include the pieces autogenerated from the target description. 200 #include "ARMGenDAGISel.inc" 201 202 private: 203 /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for 204 /// ARM. 205 SDNode *SelectARMIndexedLoad(SDNode *N); 206 SDNode *SelectT2IndexedLoad(SDNode *N); 207 208 /// SelectVLD - Select NEON load intrinsics. NumVecs should be 209 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for 210 /// loads of D registers and even subregs and odd subregs of Q registers. 211 /// For NumVecs <= 2, QOpcodes1 is not used. 212 SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 213 const uint16_t *DOpcodes, 214 const uint16_t *QOpcodes0, const uint16_t *QOpcodes1); 215 216 /// SelectVST - Select NEON store intrinsics. NumVecs should 217 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for 218 /// stores of D registers and even subregs and odd subregs of Q registers. 219 /// For NumVecs <= 2, QOpcodes1 is not used. 220 SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 221 const uint16_t *DOpcodes, 222 const uint16_t *QOpcodes0, const uint16_t *QOpcodes1); 223 224 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should 225 /// be 2, 3 or 4. The opcode arrays specify the instructions used for 226 /// load/store of D registers and Q registers. 227 SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, 228 bool isUpdating, unsigned NumVecs, 229 const uint16_t *DOpcodes, const uint16_t *QOpcodes); 230 231 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs 232 /// should be 2, 3 or 4. The opcode array specifies the instructions used 233 /// for loading D registers. (Q registers are not supported.) 234 SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, 235 const uint16_t *Opcodes); 236 237 /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2, 238 /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be 239 /// generated to force the table registers to be consecutive. 240 SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc); 241 242 /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM. 243 SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned); 244 245 // Select special operations if node forms integer ABS pattern 246 SDNode *SelectABSOp(SDNode *N); 247 248 SDNode *SelectReadRegister(SDNode *N); 249 SDNode *SelectWriteRegister(SDNode *N); 250 251 SDNode *SelectInlineAsm(SDNode *N); 252 253 SDNode *SelectConcatVector(SDNode *N); 254 255 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 256 /// inline asm expressions. 257 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 258 std::vector<SDValue> &OutOps) override; 259 260 // Form pairs of consecutive R, S, D, or Q registers. 261 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); 262 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); 263 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); 264 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); 265 266 // Form sequences of 4 consecutive S, D, or Q registers. 267 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 268 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 269 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 270 271 // Get the alignment operand for a NEON VLD or VST instruction. 272 SDValue GetVLDSTAlign(SDValue Align, SDLoc dl, unsigned NumVecs, 273 bool is64BitVector); 274 275 /// Returns the number of instructions required to materialize the given 276 /// constant in a register, or 3 if a literal pool load is needed. 277 unsigned ConstantMaterializationCost(unsigned Val) const; 278 279 /// Checks if N is a multiplication by a constant where we can extract out a 280 /// power of two from the constant so that it can be used in a shift, but only 281 /// if it simplifies the materialization of the constant. Returns true if it 282 /// is, and assigns to PowerOfTwo the power of two that should be extracted 283 /// out and to NewMulConst the new constant to be multiplied by. 284 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, 285 unsigned &PowerOfTwo, SDValue &NewMulConst) const; 286 287 /// Replace N with M in CurDAG, in a way that also ensures that M gets 288 /// selected when N would have been selected. 289 void replaceDAGValue(const SDValue &N, SDValue M); 290 }; 291 } 292 293 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 294 /// operand. If so Imm will receive the 32-bit value. 295 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 296 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 297 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 298 return true; 299 } 300 return false; 301 } 302 303 // isInt32Immediate - This method tests to see if a constant operand. 304 // If so Imm will receive the 32 bit value. 305 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 306 return isInt32Immediate(N.getNode(), Imm); 307 } 308 309 // isOpcWithIntImmediate - This method tests to see if the node is a specific 310 // opcode and that it has a immediate integer right operand. 311 // If so Imm will receive the 32 bit value. 312 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 313 return N->getOpcode() == Opc && 314 isInt32Immediate(N->getOperand(1).getNode(), Imm); 315 } 316 317 /// \brief Check whether a particular node is a constant value representable as 318 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax). 319 /// 320 /// \param ScaledConstant [out] - On success, the pre-scaled constant value. 321 static bool isScaledConstantInRange(SDValue Node, int Scale, 322 int RangeMin, int RangeMax, 323 int &ScaledConstant) { 324 assert(Scale > 0 && "Invalid scale!"); 325 326 // Check that this is a constant. 327 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); 328 if (!C) 329 return false; 330 331 ScaledConstant = (int) C->getZExtValue(); 332 if ((ScaledConstant % Scale) != 0) 333 return false; 334 335 ScaledConstant /= Scale; 336 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; 337 } 338 339 void ARMDAGToDAGISel::PreprocessISelDAG() { 340 if (!Subtarget->hasV6T2Ops()) 341 return; 342 343 bool isThumb2 = Subtarget->isThumb(); 344 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 345 E = CurDAG->allnodes_end(); I != E; ) { 346 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. 347 348 if (N->getOpcode() != ISD::ADD) 349 continue; 350 351 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with 352 // leading zeros, followed by consecutive set bits, followed by 1 or 2 353 // trailing zeros, e.g. 1020. 354 // Transform the expression to 355 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number 356 // of trailing zeros of c2. The left shift would be folded as an shifter 357 // operand of 'add' and the 'and' and 'srl' would become a bits extraction 358 // node (UBFX). 359 360 SDValue N0 = N->getOperand(0); 361 SDValue N1 = N->getOperand(1); 362 unsigned And_imm = 0; 363 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { 364 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) 365 std::swap(N0, N1); 366 } 367 if (!And_imm) 368 continue; 369 370 // Check if the AND mask is an immediate of the form: 000.....1111111100 371 unsigned TZ = countTrailingZeros(And_imm); 372 if (TZ != 1 && TZ != 2) 373 // Be conservative here. Shifter operands aren't always free. e.g. On 374 // Swift, left shifter operand of 1 / 2 for free but others are not. 375 // e.g. 376 // ubfx r3, r1, #16, #8 377 // ldr.w r3, [r0, r3, lsl #2] 378 // vs. 379 // mov.w r9, #1020 380 // and.w r2, r9, r1, lsr #14 381 // ldr r2, [r0, r2] 382 continue; 383 And_imm >>= TZ; 384 if (And_imm & (And_imm + 1)) 385 continue; 386 387 // Look for (and (srl X, c1), c2). 388 SDValue Srl = N1.getOperand(0); 389 unsigned Srl_imm = 0; 390 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || 391 (Srl_imm <= 2)) 392 continue; 393 394 // Make sure first operand is not a shifter operand which would prevent 395 // folding of the left shift. 396 SDValue CPTmp0; 397 SDValue CPTmp1; 398 SDValue CPTmp2; 399 if (isThumb2) { 400 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1)) 401 continue; 402 } else { 403 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || 404 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) 405 continue; 406 } 407 408 // Now make the transformation. 409 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32, 410 Srl.getOperand(0), 411 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl), 412 MVT::i32)); 413 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32, 414 Srl, 415 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32)); 416 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, 417 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32)); 418 CurDAG->UpdateNodeOperands(N, N0, N1); 419 } 420 } 421 422 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS 423 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at 424 /// least on current ARM implementations) which should be avoidded. 425 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { 426 if (OptLevel == CodeGenOpt::None) 427 return true; 428 429 if (!CheckVMLxHazard) 430 return true; 431 432 if (!Subtarget->isCortexA7() && !Subtarget->isCortexA8() && 433 !Subtarget->isCortexA9() && !Subtarget->isSwift()) 434 return true; 435 436 if (!N->hasOneUse()) 437 return false; 438 439 SDNode *Use = *N->use_begin(); 440 if (Use->getOpcode() == ISD::CopyToReg) 441 return true; 442 if (Use->isMachineOpcode()) { 443 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( 444 CurDAG->getSubtarget().getInstrInfo()); 445 446 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); 447 if (MCID.mayStore()) 448 return true; 449 unsigned Opcode = MCID.getOpcode(); 450 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) 451 return true; 452 // vmlx feeding into another vmlx. We actually want to unfold 453 // the use later in the MLxExpansion pass. e.g. 454 // vmla 455 // vmla (stall 8 cycles) 456 // 457 // vmul (5 cycles) 458 // vadd (5 cycles) 459 // vmla 460 // This adds up to about 18 - 19 cycles. 461 // 462 // vmla 463 // vmul (stall 4 cycles) 464 // vadd adds up to about 14 cycles. 465 return TII->isFpMLxInstruction(Opcode); 466 } 467 468 return false; 469 } 470 471 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, 472 ARM_AM::ShiftOpc ShOpcVal, 473 unsigned ShAmt) { 474 if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) 475 return true; 476 if (Shift.hasOneUse()) 477 return true; 478 // R << 2 is free. 479 return ShOpcVal == ARM_AM::lsl && 480 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); 481 } 482 483 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const { 484 if (Subtarget->isThumb()) { 485 if (Val <= 255) return 1; // MOV 486 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW 487 if (~Val <= 255) return 2; // MOV + MVN 488 if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL 489 } else { 490 if (ARM_AM::getSOImmVal(Val) != -1) return 1; // MOV 491 if (ARM_AM::getSOImmVal(~Val) != -1) return 1; // MVN 492 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW 493 if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs 494 } 495 if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT 496 return 3; // Literal pool load 497 } 498 499 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, 500 unsigned MaxShift, 501 unsigned &PowerOfTwo, 502 SDValue &NewMulConst) const { 503 assert(N.getOpcode() == ISD::MUL); 504 assert(MaxShift > 0); 505 506 // If the multiply is used in more than one place then changing the constant 507 // will make other uses incorrect, so don't. 508 if (!N.hasOneUse()) return false; 509 // Check if the multiply is by a constant 510 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1)); 511 if (!MulConst) return false; 512 // If the constant is used in more than one place then modifying it will mean 513 // we need to materialize two constants instead of one, which is a bad idea. 514 if (!MulConst->hasOneUse()) return false; 515 unsigned MulConstVal = MulConst->getZExtValue(); 516 if (MulConstVal == 0) return false; 517 518 // Find the largest power of 2 that MulConstVal is a multiple of 519 PowerOfTwo = MaxShift; 520 while ((MulConstVal % (1 << PowerOfTwo)) != 0) { 521 --PowerOfTwo; 522 if (PowerOfTwo == 0) return false; 523 } 524 525 // Only optimise if the new cost is better 526 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); 527 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); 528 unsigned OldCost = ConstantMaterializationCost(MulConstVal); 529 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal); 530 return NewCost < OldCost; 531 } 532 533 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { 534 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode()); 535 CurDAG->ReplaceAllUsesWith(N, M); 536 } 537 538 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, 539 SDValue &BaseReg, 540 SDValue &Opc, 541 bool CheckProfitability) { 542 if (DisableShifterOp) 543 return false; 544 545 // If N is a multiply-by-constant and it's profitable to extract a shift and 546 // use it in a shifted operand do so. 547 if (N.getOpcode() == ISD::MUL) { 548 unsigned PowerOfTwo = 0; 549 SDValue NewMulConst; 550 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { 551 BaseReg = SDValue(Select(CurDAG->getNode(ISD::MUL, SDLoc(N), MVT::i32, 552 N.getOperand(0), NewMulConst) 553 .getNode()), 554 0); 555 replaceDAGValue(N.getOperand(1), NewMulConst); 556 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl, 557 PowerOfTwo), 558 SDLoc(N), MVT::i32); 559 return true; 560 } 561 } 562 563 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 564 565 // Don't match base register only case. That is matched to a separate 566 // lower complexity pattern with explicit register operand. 567 if (ShOpcVal == ARM_AM::no_shift) return false; 568 569 BaseReg = N.getOperand(0); 570 unsigned ShImmVal = 0; 571 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 572 if (!RHS) return false; 573 ShImmVal = RHS->getZExtValue() & 31; 574 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 575 SDLoc(N), MVT::i32); 576 return true; 577 } 578 579 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, 580 SDValue &BaseReg, 581 SDValue &ShReg, 582 SDValue &Opc, 583 bool CheckProfitability) { 584 if (DisableShifterOp) 585 return false; 586 587 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 588 589 // Don't match base register only case. That is matched to a separate 590 // lower complexity pattern with explicit register operand. 591 if (ShOpcVal == ARM_AM::no_shift) return false; 592 593 BaseReg = N.getOperand(0); 594 unsigned ShImmVal = 0; 595 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 596 if (RHS) return false; 597 598 ShReg = N.getOperand(1); 599 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) 600 return false; 601 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 602 SDLoc(N), MVT::i32); 603 return true; 604 } 605 606 607 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, 608 SDValue &Base, 609 SDValue &OffImm) { 610 // Match simple R + imm12 operands. 611 612 // Base only. 613 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 614 !CurDAG->isBaseWithConstantOffset(N)) { 615 if (N.getOpcode() == ISD::FrameIndex) { 616 // Match frame index. 617 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 618 Base = CurDAG->getTargetFrameIndex( 619 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 620 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 621 return true; 622 } 623 624 if (N.getOpcode() == ARMISD::Wrapper && 625 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { 626 Base = N.getOperand(0); 627 } else 628 Base = N; 629 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 630 return true; 631 } 632 633 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 634 int RHSC = (int)RHS->getSExtValue(); 635 if (N.getOpcode() == ISD::SUB) 636 RHSC = -RHSC; 637 638 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits 639 Base = N.getOperand(0); 640 if (Base.getOpcode() == ISD::FrameIndex) { 641 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 642 Base = CurDAG->getTargetFrameIndex( 643 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 644 } 645 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 646 return true; 647 } 648 } 649 650 // Base only. 651 Base = N; 652 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 653 return true; 654 } 655 656 657 658 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, 659 SDValue &Opc) { 660 if (N.getOpcode() == ISD::MUL && 661 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { 662 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 663 // X * [3,5,9] -> X + X * [2,4,8] etc. 664 int RHSC = (int)RHS->getZExtValue(); 665 if (RHSC & 1) { 666 RHSC = RHSC & ~1; 667 ARM_AM::AddrOpc AddSub = ARM_AM::add; 668 if (RHSC < 0) { 669 AddSub = ARM_AM::sub; 670 RHSC = - RHSC; 671 } 672 if (isPowerOf2_32(RHSC)) { 673 unsigned ShAmt = Log2_32(RHSC); 674 Base = Offset = N.getOperand(0); 675 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 676 ARM_AM::lsl), 677 SDLoc(N), MVT::i32); 678 return true; 679 } 680 } 681 } 682 } 683 684 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 685 // ISD::OR that is equivalent to an ISD::ADD. 686 !CurDAG->isBaseWithConstantOffset(N)) 687 return false; 688 689 // Leave simple R +/- imm12 operands for LDRi12 690 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { 691 int RHSC; 692 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 693 -0x1000+1, 0x1000, RHSC)) // 12 bits. 694 return false; 695 } 696 697 // Otherwise this is R +/- [possibly shifted] R. 698 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; 699 ARM_AM::ShiftOpc ShOpcVal = 700 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 701 unsigned ShAmt = 0; 702 703 Base = N.getOperand(0); 704 Offset = N.getOperand(1); 705 706 if (ShOpcVal != ARM_AM::no_shift) { 707 // Check to see if the RHS of the shift is a constant, if not, we can't fold 708 // it. 709 if (ConstantSDNode *Sh = 710 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 711 ShAmt = Sh->getZExtValue(); 712 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 713 Offset = N.getOperand(1).getOperand(0); 714 else { 715 ShAmt = 0; 716 ShOpcVal = ARM_AM::no_shift; 717 } 718 } else { 719 ShOpcVal = ARM_AM::no_shift; 720 } 721 } 722 723 // Try matching (R shl C) + (R). 724 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 725 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 726 N.getOperand(0).hasOneUse())) { 727 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 728 if (ShOpcVal != ARM_AM::no_shift) { 729 // Check to see if the RHS of the shift is a constant, if not, we can't 730 // fold it. 731 if (ConstantSDNode *Sh = 732 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 733 ShAmt = Sh->getZExtValue(); 734 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 735 Offset = N.getOperand(0).getOperand(0); 736 Base = N.getOperand(1); 737 } else { 738 ShAmt = 0; 739 ShOpcVal = ARM_AM::no_shift; 740 } 741 } else { 742 ShOpcVal = ARM_AM::no_shift; 743 } 744 } 745 } 746 747 // If Offset is a multiply-by-constant and it's profitable to extract a shift 748 // and use it in a shifted operand do so. 749 if (Offset.getOpcode() == ISD::MUL) { 750 unsigned PowerOfTwo = 0; 751 SDValue NewMulConst; 752 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { 753 replaceDAGValue(Offset.getOperand(1), NewMulConst); 754 ShAmt = PowerOfTwo; 755 ShOpcVal = ARM_AM::lsl; 756 } 757 } 758 759 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 760 SDLoc(N), MVT::i32); 761 return true; 762 } 763 764 765 //----- 766 767 AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, 768 SDValue &Base, 769 SDValue &Offset, 770 SDValue &Opc) { 771 if (N.getOpcode() == ISD::MUL && 772 (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) { 773 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 774 // X * [3,5,9] -> X + X * [2,4,8] etc. 775 int RHSC = (int)RHS->getZExtValue(); 776 if (RHSC & 1) { 777 RHSC = RHSC & ~1; 778 ARM_AM::AddrOpc AddSub = ARM_AM::add; 779 if (RHSC < 0) { 780 AddSub = ARM_AM::sub; 781 RHSC = - RHSC; 782 } 783 if (isPowerOf2_32(RHSC)) { 784 unsigned ShAmt = Log2_32(RHSC); 785 Base = Offset = N.getOperand(0); 786 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 787 ARM_AM::lsl), 788 SDLoc(N), MVT::i32); 789 return AM2_SHOP; 790 } 791 } 792 } 793 } 794 795 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 796 // ISD::OR that is equivalent to an ADD. 797 !CurDAG->isBaseWithConstantOffset(N)) { 798 Base = N; 799 if (N.getOpcode() == ISD::FrameIndex) { 800 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 801 Base = CurDAG->getTargetFrameIndex( 802 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 803 } else if (N.getOpcode() == ARMISD::Wrapper && 804 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { 805 Base = N.getOperand(0); 806 } 807 Offset = CurDAG->getRegister(0, MVT::i32); 808 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0, 809 ARM_AM::no_shift), 810 SDLoc(N), MVT::i32); 811 return AM2_BASE; 812 } 813 814 // Match simple R +/- imm12 operands. 815 if (N.getOpcode() != ISD::SUB) { 816 int RHSC; 817 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 818 -0x1000+1, 0x1000, RHSC)) { // 12 bits. 819 Base = N.getOperand(0); 820 if (Base.getOpcode() == ISD::FrameIndex) { 821 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 822 Base = CurDAG->getTargetFrameIndex( 823 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 824 } 825 Offset = CurDAG->getRegister(0, MVT::i32); 826 827 ARM_AM::AddrOpc AddSub = ARM_AM::add; 828 if (RHSC < 0) { 829 AddSub = ARM_AM::sub; 830 RHSC = - RHSC; 831 } 832 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC, 833 ARM_AM::no_shift), 834 SDLoc(N), MVT::i32); 835 return AM2_BASE; 836 } 837 } 838 839 if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) { 840 // Compute R +/- (R << N) and reuse it. 841 Base = N; 842 Offset = CurDAG->getRegister(0, MVT::i32); 843 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0, 844 ARM_AM::no_shift), 845 SDLoc(N), MVT::i32); 846 return AM2_BASE; 847 } 848 849 // Otherwise this is R +/- [possibly shifted] R. 850 ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub; 851 ARM_AM::ShiftOpc ShOpcVal = 852 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 853 unsigned ShAmt = 0; 854 855 Base = N.getOperand(0); 856 Offset = N.getOperand(1); 857 858 if (ShOpcVal != ARM_AM::no_shift) { 859 // Check to see if the RHS of the shift is a constant, if not, we can't fold 860 // it. 861 if (ConstantSDNode *Sh = 862 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 863 ShAmt = Sh->getZExtValue(); 864 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 865 Offset = N.getOperand(1).getOperand(0); 866 else { 867 ShAmt = 0; 868 ShOpcVal = ARM_AM::no_shift; 869 } 870 } else { 871 ShOpcVal = ARM_AM::no_shift; 872 } 873 } 874 875 // Try matching (R shl C) + (R). 876 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 877 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 878 N.getOperand(0).hasOneUse())) { 879 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 880 if (ShOpcVal != ARM_AM::no_shift) { 881 // Check to see if the RHS of the shift is a constant, if not, we can't 882 // fold it. 883 if (ConstantSDNode *Sh = 884 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 885 ShAmt = Sh->getZExtValue(); 886 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 887 Offset = N.getOperand(0).getOperand(0); 888 Base = N.getOperand(1); 889 } else { 890 ShAmt = 0; 891 ShOpcVal = ARM_AM::no_shift; 892 } 893 } else { 894 ShOpcVal = ARM_AM::no_shift; 895 } 896 } 897 } 898 899 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 900 SDLoc(N), MVT::i32); 901 return AM2_SHOP; 902 } 903 904 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 905 SDValue &Offset, SDValue &Opc) { 906 unsigned Opcode = Op->getOpcode(); 907 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 908 ? cast<LoadSDNode>(Op)->getAddressingMode() 909 : cast<StoreSDNode>(Op)->getAddressingMode(); 910 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 911 ? ARM_AM::add : ARM_AM::sub; 912 int Val; 913 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) 914 return false; 915 916 Offset = N; 917 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 918 unsigned ShAmt = 0; 919 if (ShOpcVal != ARM_AM::no_shift) { 920 // Check to see if the RHS of the shift is a constant, if not, we can't fold 921 // it. 922 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 923 ShAmt = Sh->getZExtValue(); 924 if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) 925 Offset = N.getOperand(0); 926 else { 927 ShAmt = 0; 928 ShOpcVal = ARM_AM::no_shift; 929 } 930 } else { 931 ShOpcVal = ARM_AM::no_shift; 932 } 933 } 934 935 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 936 SDLoc(N), MVT::i32); 937 return true; 938 } 939 940 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 941 SDValue &Offset, SDValue &Opc) { 942 unsigned Opcode = Op->getOpcode(); 943 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 944 ? cast<LoadSDNode>(Op)->getAddressingMode() 945 : cast<StoreSDNode>(Op)->getAddressingMode(); 946 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 947 ? ARM_AM::add : ARM_AM::sub; 948 int Val; 949 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 950 if (AddSub == ARM_AM::sub) Val *= -1; 951 Offset = CurDAG->getRegister(0, MVT::i32); 952 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32); 953 return true; 954 } 955 956 return false; 957 } 958 959 960 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 961 SDValue &Offset, SDValue &Opc) { 962 unsigned Opcode = Op->getOpcode(); 963 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 964 ? cast<LoadSDNode>(Op)->getAddressingMode() 965 : cast<StoreSDNode>(Op)->getAddressingMode(); 966 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 967 ? ARM_AM::add : ARM_AM::sub; 968 int Val; 969 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 970 Offset = CurDAG->getRegister(0, MVT::i32); 971 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, 972 ARM_AM::no_shift), 973 SDLoc(Op), MVT::i32); 974 return true; 975 } 976 977 return false; 978 } 979 980 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { 981 Base = N; 982 return true; 983 } 984 985 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, 986 SDValue &Base, SDValue &Offset, 987 SDValue &Opc) { 988 if (N.getOpcode() == ISD::SUB) { 989 // X - C is canonicalize to X + -C, no need to handle it here. 990 Base = N.getOperand(0); 991 Offset = N.getOperand(1); 992 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N), 993 MVT::i32); 994 return true; 995 } 996 997 if (!CurDAG->isBaseWithConstantOffset(N)) { 998 Base = N; 999 if (N.getOpcode() == ISD::FrameIndex) { 1000 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1001 Base = CurDAG->getTargetFrameIndex( 1002 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1003 } 1004 Offset = CurDAG->getRegister(0, MVT::i32); 1005 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 1006 MVT::i32); 1007 return true; 1008 } 1009 1010 // If the RHS is +/- imm8, fold into addr mode. 1011 int RHSC; 1012 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 1013 -256 + 1, 256, RHSC)) { // 8 bits. 1014 Base = N.getOperand(0); 1015 if (Base.getOpcode() == ISD::FrameIndex) { 1016 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1017 Base = CurDAG->getTargetFrameIndex( 1018 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1019 } 1020 Offset = CurDAG->getRegister(0, MVT::i32); 1021 1022 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1023 if (RHSC < 0) { 1024 AddSub = ARM_AM::sub; 1025 RHSC = -RHSC; 1026 } 1027 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N), 1028 MVT::i32); 1029 return true; 1030 } 1031 1032 Base = N.getOperand(0); 1033 Offset = N.getOperand(1); 1034 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 1035 MVT::i32); 1036 return true; 1037 } 1038 1039 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, 1040 SDValue &Offset, SDValue &Opc) { 1041 unsigned Opcode = Op->getOpcode(); 1042 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1043 ? cast<LoadSDNode>(Op)->getAddressingMode() 1044 : cast<StoreSDNode>(Op)->getAddressingMode(); 1045 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 1046 ? ARM_AM::add : ARM_AM::sub; 1047 int Val; 1048 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits. 1049 Offset = CurDAG->getRegister(0, MVT::i32); 1050 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op), 1051 MVT::i32); 1052 return true; 1053 } 1054 1055 Offset = N; 1056 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op), 1057 MVT::i32); 1058 return true; 1059 } 1060 1061 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, 1062 SDValue &Base, SDValue &Offset) { 1063 if (!CurDAG->isBaseWithConstantOffset(N)) { 1064 Base = N; 1065 if (N.getOpcode() == ISD::FrameIndex) { 1066 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1067 Base = CurDAG->getTargetFrameIndex( 1068 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1069 } else if (N.getOpcode() == ARMISD::Wrapper && 1070 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { 1071 Base = N.getOperand(0); 1072 } 1073 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 1074 SDLoc(N), MVT::i32); 1075 return true; 1076 } 1077 1078 // If the RHS is +/- imm8, fold into addr mode. 1079 int RHSC; 1080 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 1081 -256 + 1, 256, RHSC)) { 1082 Base = N.getOperand(0); 1083 if (Base.getOpcode() == ISD::FrameIndex) { 1084 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1085 Base = CurDAG->getTargetFrameIndex( 1086 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1087 } 1088 1089 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1090 if (RHSC < 0) { 1091 AddSub = ARM_AM::sub; 1092 RHSC = -RHSC; 1093 } 1094 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), 1095 SDLoc(N), MVT::i32); 1096 return true; 1097 } 1098 1099 Base = N; 1100 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 1101 SDLoc(N), MVT::i32); 1102 return true; 1103 } 1104 1105 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, 1106 SDValue &Align) { 1107 Addr = N; 1108 1109 unsigned Alignment = 0; 1110 1111 MemSDNode *MemN = cast<MemSDNode>(Parent); 1112 1113 if (isa<LSBaseSDNode>(MemN) || 1114 ((MemN->getOpcode() == ARMISD::VST1_UPD || 1115 MemN->getOpcode() == ARMISD::VLD1_UPD) && 1116 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) { 1117 // This case occurs only for VLD1-lane/dup and VST1-lane instructions. 1118 // The maximum alignment is equal to the memory size being referenced. 1119 unsigned MMOAlign = MemN->getAlignment(); 1120 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8; 1121 if (MMOAlign >= MemSize && MemSize > 1) 1122 Alignment = MemSize; 1123 } else { 1124 // All other uses of addrmode6 are for intrinsics. For now just record 1125 // the raw alignment value; it will be refined later based on the legal 1126 // alignment operands for the intrinsic. 1127 Alignment = MemN->getAlignment(); 1128 } 1129 1130 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32); 1131 return true; 1132 } 1133 1134 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, 1135 SDValue &Offset) { 1136 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); 1137 ISD::MemIndexedMode AM = LdSt->getAddressingMode(); 1138 if (AM != ISD::POST_INC) 1139 return false; 1140 Offset = N; 1141 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { 1142 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) 1143 Offset = CurDAG->getRegister(0, MVT::i32); 1144 } 1145 return true; 1146 } 1147 1148 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, 1149 SDValue &Offset, SDValue &Label) { 1150 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { 1151 Offset = N.getOperand(0); 1152 SDValue N1 = N.getOperand(1); 1153 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(), 1154 SDLoc(N), MVT::i32); 1155 return true; 1156 } 1157 1158 return false; 1159 } 1160 1161 1162 //===----------------------------------------------------------------------===// 1163 // Thumb Addressing Modes 1164 //===----------------------------------------------------------------------===// 1165 1166 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, 1167 SDValue &Base, SDValue &Offset){ 1168 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) { 1169 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N); 1170 if (!NC || !NC->isNullValue()) 1171 return false; 1172 1173 Base = Offset = N; 1174 return true; 1175 } 1176 1177 Base = N.getOperand(0); 1178 Offset = N.getOperand(1); 1179 return true; 1180 } 1181 1182 bool 1183 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, 1184 SDValue &Base, SDValue &OffImm) { 1185 if (!CurDAG->isBaseWithConstantOffset(N)) { 1186 if (N.getOpcode() == ISD::ADD) { 1187 return false; // We want to select register offset instead 1188 } else if (N.getOpcode() == ARMISD::Wrapper && 1189 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { 1190 Base = N.getOperand(0); 1191 } else { 1192 Base = N; 1193 } 1194 1195 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1196 return true; 1197 } 1198 1199 // If the RHS is + imm5 * scale, fold into addr mode. 1200 int RHSC; 1201 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { 1202 Base = N.getOperand(0); 1203 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1204 return true; 1205 } 1206 1207 // Offset is too large, so use register offset instead. 1208 return false; 1209 } 1210 1211 bool 1212 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 1213 SDValue &OffImm) { 1214 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm); 1215 } 1216 1217 bool 1218 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 1219 SDValue &OffImm) { 1220 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm); 1221 } 1222 1223 bool 1224 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 1225 SDValue &OffImm) { 1226 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm); 1227 } 1228 1229 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, 1230 SDValue &Base, SDValue &OffImm) { 1231 if (N.getOpcode() == ISD::FrameIndex) { 1232 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1233 // Only multiples of 4 are allowed for the offset, so the frame object 1234 // alignment must be at least 4. 1235 MachineFrameInfo *MFI = MF->getFrameInfo(); 1236 if (MFI->getObjectAlignment(FI) < 4) 1237 MFI->setObjectAlignment(FI, 4); 1238 Base = CurDAG->getTargetFrameIndex( 1239 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1240 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1241 return true; 1242 } 1243 1244 if (!CurDAG->isBaseWithConstantOffset(N)) 1245 return false; 1246 1247 RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0)); 1248 if (N.getOperand(0).getOpcode() == ISD::FrameIndex || 1249 (LHSR && LHSR->getReg() == ARM::SP)) { 1250 // If the RHS is + imm8 * scale, fold into addr mode. 1251 int RHSC; 1252 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) { 1253 Base = N.getOperand(0); 1254 if (Base.getOpcode() == ISD::FrameIndex) { 1255 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1256 // For LHS+RHS to result in an offset that's a multiple of 4 the object 1257 // indexed by the LHS must be 4-byte aligned. 1258 MachineFrameInfo *MFI = MF->getFrameInfo(); 1259 if (MFI->getObjectAlignment(FI) < 4) 1260 MFI->setObjectAlignment(FI, 4); 1261 Base = CurDAG->getTargetFrameIndex( 1262 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1263 } 1264 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1265 return true; 1266 } 1267 } 1268 1269 return false; 1270 } 1271 1272 1273 //===----------------------------------------------------------------------===// 1274 // Thumb 2 Addressing Modes 1275 //===----------------------------------------------------------------------===// 1276 1277 1278 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, 1279 SDValue &Base, SDValue &OffImm) { 1280 // Match simple R + imm12 operands. 1281 1282 // Base only. 1283 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1284 !CurDAG->isBaseWithConstantOffset(N)) { 1285 if (N.getOpcode() == ISD::FrameIndex) { 1286 // Match frame index. 1287 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1288 Base = CurDAG->getTargetFrameIndex( 1289 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1290 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1291 return true; 1292 } 1293 1294 if (N.getOpcode() == ARMISD::Wrapper && 1295 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { 1296 Base = N.getOperand(0); 1297 if (Base.getOpcode() == ISD::TargetConstantPool) 1298 return false; // We want to select t2LDRpci instead. 1299 } else 1300 Base = N; 1301 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1302 return true; 1303 } 1304 1305 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1306 if (SelectT2AddrModeImm8(N, Base, OffImm)) 1307 // Let t2LDRi8 handle (R - imm8). 1308 return false; 1309 1310 int RHSC = (int)RHS->getZExtValue(); 1311 if (N.getOpcode() == ISD::SUB) 1312 RHSC = -RHSC; 1313 1314 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) 1315 Base = N.getOperand(0); 1316 if (Base.getOpcode() == ISD::FrameIndex) { 1317 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1318 Base = CurDAG->getTargetFrameIndex( 1319 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1320 } 1321 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1322 return true; 1323 } 1324 } 1325 1326 // Base only. 1327 Base = N; 1328 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1329 return true; 1330 } 1331 1332 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, 1333 SDValue &Base, SDValue &OffImm) { 1334 // Match simple R - imm8 operands. 1335 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1336 !CurDAG->isBaseWithConstantOffset(N)) 1337 return false; 1338 1339 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1340 int RHSC = (int)RHS->getSExtValue(); 1341 if (N.getOpcode() == ISD::SUB) 1342 RHSC = -RHSC; 1343 1344 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) 1345 Base = N.getOperand(0); 1346 if (Base.getOpcode() == ISD::FrameIndex) { 1347 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1348 Base = CurDAG->getTargetFrameIndex( 1349 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1350 } 1351 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1352 return true; 1353 } 1354 } 1355 1356 return false; 1357 } 1358 1359 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 1360 SDValue &OffImm){ 1361 unsigned Opcode = Op->getOpcode(); 1362 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1363 ? cast<LoadSDNode>(Op)->getAddressingMode() 1364 : cast<StoreSDNode>(Op)->getAddressingMode(); 1365 int RHSC; 1366 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits. 1367 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1368 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32) 1369 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32); 1370 return true; 1371 } 1372 1373 return false; 1374 } 1375 1376 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, 1377 SDValue &Base, 1378 SDValue &OffReg, SDValue &ShImm) { 1379 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. 1380 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) 1381 return false; 1382 1383 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8. 1384 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1385 int RHSC = (int)RHS->getZExtValue(); 1386 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned) 1387 return false; 1388 else if (RHSC < 0 && RHSC >= -255) // 8 bits 1389 return false; 1390 } 1391 1392 // Look for (R + R) or (R + (R << [1,2,3])). 1393 unsigned ShAmt = 0; 1394 Base = N.getOperand(0); 1395 OffReg = N.getOperand(1); 1396 1397 // Swap if it is ((R << c) + R). 1398 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode()); 1399 if (ShOpcVal != ARM_AM::lsl) { 1400 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode()); 1401 if (ShOpcVal == ARM_AM::lsl) 1402 std::swap(Base, OffReg); 1403 } 1404 1405 if (ShOpcVal == ARM_AM::lsl) { 1406 // Check to see if the RHS of the shift is a constant, if not, we can't fold 1407 // it. 1408 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) { 1409 ShAmt = Sh->getZExtValue(); 1410 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt)) 1411 OffReg = OffReg.getOperand(0); 1412 else { 1413 ShAmt = 0; 1414 } 1415 } 1416 } 1417 1418 // If OffReg is a multiply-by-constant and it's profitable to extract a shift 1419 // and use it in a shifted operand do so. 1420 if (OffReg.getOpcode() == ISD::MUL) { 1421 unsigned PowerOfTwo = 0; 1422 SDValue NewMulConst; 1423 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { 1424 replaceDAGValue(OffReg.getOperand(1), NewMulConst); 1425 ShAmt = PowerOfTwo; 1426 } 1427 } 1428 1429 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32); 1430 1431 return true; 1432 } 1433 1434 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, 1435 SDValue &OffImm) { 1436 // This *must* succeed since it's used for the irreplaceable ldrex and strex 1437 // instructions. 1438 Base = N; 1439 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1440 1441 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) 1442 return true; 1443 1444 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1445 if (!RHS) 1446 return true; 1447 1448 uint32_t RHSC = (int)RHS->getZExtValue(); 1449 if (RHSC > 1020 || RHSC % 4 != 0) 1450 return true; 1451 1452 Base = N.getOperand(0); 1453 if (Base.getOpcode() == ISD::FrameIndex) { 1454 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1455 Base = CurDAG->getTargetFrameIndex( 1456 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1457 } 1458 1459 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32); 1460 return true; 1461 } 1462 1463 //===--------------------------------------------------------------------===// 1464 1465 /// getAL - Returns a ARMCC::AL immediate node. 1466 static inline SDValue getAL(SelectionDAG *CurDAG, SDLoc dl) { 1467 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32); 1468 } 1469 1470 SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) { 1471 LoadSDNode *LD = cast<LoadSDNode>(N); 1472 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1473 if (AM == ISD::UNINDEXED) 1474 return nullptr; 1475 1476 EVT LoadedVT = LD->getMemoryVT(); 1477 SDValue Offset, AMOpc; 1478 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1479 unsigned Opcode = 0; 1480 bool Match = false; 1481 if (LoadedVT == MVT::i32 && isPre && 1482 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1483 Opcode = ARM::LDR_PRE_IMM; 1484 Match = true; 1485 } else if (LoadedVT == MVT::i32 && !isPre && 1486 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1487 Opcode = ARM::LDR_POST_IMM; 1488 Match = true; 1489 } else if (LoadedVT == MVT::i32 && 1490 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1491 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG; 1492 Match = true; 1493 1494 } else if (LoadedVT == MVT::i16 && 1495 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1496 Match = true; 1497 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) 1498 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) 1499 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); 1500 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { 1501 if (LD->getExtensionType() == ISD::SEXTLOAD) { 1502 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1503 Match = true; 1504 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; 1505 } 1506 } else { 1507 if (isPre && 1508 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1509 Match = true; 1510 Opcode = ARM::LDRB_PRE_IMM; 1511 } else if (!isPre && 1512 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1513 Match = true; 1514 Opcode = ARM::LDRB_POST_IMM; 1515 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1516 Match = true; 1517 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG; 1518 } 1519 } 1520 } 1521 1522 if (Match) { 1523 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) { 1524 SDValue Chain = LD->getChain(); 1525 SDValue Base = LD->getBasePtr(); 1526 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)), 1527 CurDAG->getRegister(0, MVT::i32), Chain }; 1528 return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, 1529 MVT::i32, MVT::Other, Ops); 1530 } else { 1531 SDValue Chain = LD->getChain(); 1532 SDValue Base = LD->getBasePtr(); 1533 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)), 1534 CurDAG->getRegister(0, MVT::i32), Chain }; 1535 return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, 1536 MVT::i32, MVT::Other, Ops); 1537 } 1538 } 1539 1540 return nullptr; 1541 } 1542 1543 SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { 1544 LoadSDNode *LD = cast<LoadSDNode>(N); 1545 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1546 if (AM == ISD::UNINDEXED) 1547 return nullptr; 1548 1549 EVT LoadedVT = LD->getMemoryVT(); 1550 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1551 SDValue Offset; 1552 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1553 unsigned Opcode = 0; 1554 bool Match = false; 1555 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { 1556 switch (LoadedVT.getSimpleVT().SimpleTy) { 1557 case MVT::i32: 1558 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; 1559 break; 1560 case MVT::i16: 1561 if (isSExtLd) 1562 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST; 1563 else 1564 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST; 1565 break; 1566 case MVT::i8: 1567 case MVT::i1: 1568 if (isSExtLd) 1569 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST; 1570 else 1571 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; 1572 break; 1573 default: 1574 return nullptr; 1575 } 1576 Match = true; 1577 } 1578 1579 if (Match) { 1580 SDValue Chain = LD->getChain(); 1581 SDValue Base = LD->getBasePtr(); 1582 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), 1583 CurDAG->getRegister(0, MVT::i32), Chain }; 1584 return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1585 MVT::Other, Ops); 1586 } 1587 1588 return nullptr; 1589 } 1590 1591 /// \brief Form a GPRPair pseudo register from a pair of GPR regs. 1592 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { 1593 SDLoc dl(V0.getNode()); 1594 SDValue RegClass = 1595 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); 1596 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 1597 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 1598 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1599 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1600 } 1601 1602 /// \brief Form a D register from a pair of S registers. 1603 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1604 SDLoc dl(V0.getNode()); 1605 SDValue RegClass = 1606 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32); 1607 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1608 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1609 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1610 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1611 } 1612 1613 /// \brief Form a quad register from a pair of D registers. 1614 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1615 SDLoc dl(V0.getNode()); 1616 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl, 1617 MVT::i32); 1618 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1619 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1620 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1621 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1622 } 1623 1624 /// \brief Form 4 consecutive D registers from a pair of Q registers. 1625 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1626 SDLoc dl(V0.getNode()); 1627 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1628 MVT::i32); 1629 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1630 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1631 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1632 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1633 } 1634 1635 /// \brief Form 4 consecutive S registers. 1636 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, 1637 SDValue V2, SDValue V3) { 1638 SDLoc dl(V0.getNode()); 1639 SDValue RegClass = 1640 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32); 1641 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1642 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1643 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32); 1644 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32); 1645 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1646 V2, SubReg2, V3, SubReg3 }; 1647 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1648 } 1649 1650 /// \brief Form 4 consecutive D registers. 1651 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, 1652 SDValue V2, SDValue V3) { 1653 SDLoc dl(V0.getNode()); 1654 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1655 MVT::i32); 1656 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1657 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1658 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32); 1659 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32); 1660 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1661 V2, SubReg2, V3, SubReg3 }; 1662 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1663 } 1664 1665 /// \brief Form 4 consecutive Q registers. 1666 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, 1667 SDValue V2, SDValue V3) { 1668 SDLoc dl(V0.getNode()); 1669 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl, 1670 MVT::i32); 1671 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1672 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1673 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32); 1674 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32); 1675 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1676 V2, SubReg2, V3, SubReg3 }; 1677 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1678 } 1679 1680 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand 1681 /// of a NEON VLD or VST instruction. The supported values depend on the 1682 /// number of registers being loaded. 1683 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, SDLoc dl, 1684 unsigned NumVecs, bool is64BitVector) { 1685 unsigned NumRegs = NumVecs; 1686 if (!is64BitVector && NumVecs < 3) 1687 NumRegs *= 2; 1688 1689 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 1690 if (Alignment >= 32 && NumRegs == 4) 1691 Alignment = 32; 1692 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4)) 1693 Alignment = 16; 1694 else if (Alignment >= 8) 1695 Alignment = 8; 1696 else 1697 Alignment = 0; 1698 1699 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 1700 } 1701 1702 static bool isVLDfixed(unsigned Opc) 1703 { 1704 switch (Opc) { 1705 default: return false; 1706 case ARM::VLD1d8wb_fixed : return true; 1707 case ARM::VLD1d16wb_fixed : return true; 1708 case ARM::VLD1d64Qwb_fixed : return true; 1709 case ARM::VLD1d32wb_fixed : return true; 1710 case ARM::VLD1d64wb_fixed : return true; 1711 case ARM::VLD1d64TPseudoWB_fixed : return true; 1712 case ARM::VLD1d64QPseudoWB_fixed : return true; 1713 case ARM::VLD1q8wb_fixed : return true; 1714 case ARM::VLD1q16wb_fixed : return true; 1715 case ARM::VLD1q32wb_fixed : return true; 1716 case ARM::VLD1q64wb_fixed : return true; 1717 case ARM::VLD2d8wb_fixed : return true; 1718 case ARM::VLD2d16wb_fixed : return true; 1719 case ARM::VLD2d32wb_fixed : return true; 1720 case ARM::VLD2q8PseudoWB_fixed : return true; 1721 case ARM::VLD2q16PseudoWB_fixed : return true; 1722 case ARM::VLD2q32PseudoWB_fixed : return true; 1723 case ARM::VLD2DUPd8wb_fixed : return true; 1724 case ARM::VLD2DUPd16wb_fixed : return true; 1725 case ARM::VLD2DUPd32wb_fixed : return true; 1726 } 1727 } 1728 1729 static bool isVSTfixed(unsigned Opc) 1730 { 1731 switch (Opc) { 1732 default: return false; 1733 case ARM::VST1d8wb_fixed : return true; 1734 case ARM::VST1d16wb_fixed : return true; 1735 case ARM::VST1d32wb_fixed : return true; 1736 case ARM::VST1d64wb_fixed : return true; 1737 case ARM::VST1q8wb_fixed : return true; 1738 case ARM::VST1q16wb_fixed : return true; 1739 case ARM::VST1q32wb_fixed : return true; 1740 case ARM::VST1q64wb_fixed : return true; 1741 case ARM::VST1d64TPseudoWB_fixed : return true; 1742 case ARM::VST1d64QPseudoWB_fixed : return true; 1743 case ARM::VST2d8wb_fixed : return true; 1744 case ARM::VST2d16wb_fixed : return true; 1745 case ARM::VST2d32wb_fixed : return true; 1746 case ARM::VST2q8PseudoWB_fixed : return true; 1747 case ARM::VST2q16PseudoWB_fixed : return true; 1748 case ARM::VST2q32PseudoWB_fixed : return true; 1749 } 1750 } 1751 1752 // Get the register stride update opcode of a VLD/VST instruction that 1753 // is otherwise equivalent to the given fixed stride updating instruction. 1754 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { 1755 assert((isVLDfixed(Opc) || isVSTfixed(Opc)) 1756 && "Incorrect fixed stride updating instruction."); 1757 switch (Opc) { 1758 default: break; 1759 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; 1760 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; 1761 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; 1762 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; 1763 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; 1764 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; 1765 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; 1766 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; 1767 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register; 1768 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; 1769 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; 1770 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; 1771 1772 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; 1773 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; 1774 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; 1775 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; 1776 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; 1777 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; 1778 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; 1779 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; 1780 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; 1781 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; 1782 1783 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; 1784 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; 1785 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; 1786 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; 1787 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; 1788 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; 1789 1790 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; 1791 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; 1792 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; 1793 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; 1794 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; 1795 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; 1796 1797 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; 1798 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; 1799 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; 1800 } 1801 return Opc; // If not one we handle, return it unchanged. 1802 } 1803 1804 SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 1805 const uint16_t *DOpcodes, 1806 const uint16_t *QOpcodes0, 1807 const uint16_t *QOpcodes1) { 1808 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); 1809 SDLoc dl(N); 1810 1811 SDValue MemAddr, Align; 1812 unsigned AddrOpIdx = isUpdating ? 1 : 2; 1813 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 1814 return nullptr; 1815 1816 SDValue Chain = N->getOperand(0); 1817 EVT VT = N->getValueType(0); 1818 bool is64BitVector = VT.is64BitVector(); 1819 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 1820 1821 unsigned OpcodeIndex; 1822 switch (VT.getSimpleVT().SimpleTy) { 1823 default: llvm_unreachable("unhandled vld type"); 1824 // Double-register operations: 1825 case MVT::v8i8: OpcodeIndex = 0; break; 1826 case MVT::v4i16: OpcodeIndex = 1; break; 1827 case MVT::v2f32: 1828 case MVT::v2i32: OpcodeIndex = 2; break; 1829 case MVT::v1i64: OpcodeIndex = 3; break; 1830 // Quad-register operations: 1831 case MVT::v16i8: OpcodeIndex = 0; break; 1832 case MVT::v8i16: OpcodeIndex = 1; break; 1833 case MVT::v4f32: 1834 case MVT::v4i32: OpcodeIndex = 2; break; 1835 case MVT::v2f64: 1836 case MVT::v2i64: OpcodeIndex = 3; 1837 assert(NumVecs == 1 && "v2i64 type only supported for VLD1"); 1838 break; 1839 } 1840 1841 EVT ResTy; 1842 if (NumVecs == 1) 1843 ResTy = VT; 1844 else { 1845 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 1846 if (!is64BitVector) 1847 ResTyElts *= 2; 1848 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 1849 } 1850 std::vector<EVT> ResTys; 1851 ResTys.push_back(ResTy); 1852 if (isUpdating) 1853 ResTys.push_back(MVT::i32); 1854 ResTys.push_back(MVT::Other); 1855 1856 SDValue Pred = getAL(CurDAG, dl); 1857 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 1858 SDNode *VLd; 1859 SmallVector<SDValue, 7> Ops; 1860 1861 // Double registers and VLD1/VLD2 quad registers are directly supported. 1862 if (is64BitVector || NumVecs <= 2) { 1863 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 1864 QOpcodes0[OpcodeIndex]); 1865 Ops.push_back(MemAddr); 1866 Ops.push_back(Align); 1867 if (isUpdating) { 1868 SDValue Inc = N->getOperand(AddrOpIdx + 1); 1869 // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0 1870 // case entirely when the rest are updated to that form, too. 1871 if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode())) 1872 Opc = getVLDSTRegisterUpdateOpcode(Opc); 1873 // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so 1874 // check for that explicitly too. Horribly hacky, but temporary. 1875 if ((NumVecs > 2 && !isVLDfixed(Opc)) || 1876 !isa<ConstantSDNode>(Inc.getNode())) 1877 Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); 1878 } 1879 Ops.push_back(Pred); 1880 Ops.push_back(Reg0); 1881 Ops.push_back(Chain); 1882 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1883 1884 } else { 1885 // Otherwise, quad registers are loaded with two separate instructions, 1886 // where one loads the even registers and the other loads the odd registers. 1887 EVT AddrTy = MemAddr.getValueType(); 1888 1889 // Load the even subregs. This is always an updating load, so that it 1890 // provides the address to the second load for the odd subregs. 1891 SDValue ImplDef = 1892 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 1893 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; 1894 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 1895 ResTy, AddrTy, MVT::Other, OpsA); 1896 Chain = SDValue(VLdA, 2); 1897 1898 // Load the odd subregs. 1899 Ops.push_back(SDValue(VLdA, 1)); 1900 Ops.push_back(Align); 1901 if (isUpdating) { 1902 SDValue Inc = N->getOperand(AddrOpIdx + 1); 1903 assert(isa<ConstantSDNode>(Inc.getNode()) && 1904 "only constant post-increment update allowed for VLD3/4"); 1905 (void)Inc; 1906 Ops.push_back(Reg0); 1907 } 1908 Ops.push_back(SDValue(VLdA, 0)); 1909 Ops.push_back(Pred); 1910 Ops.push_back(Reg0); 1911 Ops.push_back(Chain); 1912 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops); 1913 } 1914 1915 // Transfer memoperands. 1916 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1917 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1918 cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1); 1919 1920 if (NumVecs == 1) 1921 return VLd; 1922 1923 // Extract out the subregisters. 1924 SDValue SuperReg = SDValue(VLd, 0); 1925 assert(ARM::dsub_7 == ARM::dsub_0+7 && 1926 ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); 1927 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0); 1928 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 1929 ReplaceUses(SDValue(N, Vec), 1930 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 1931 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); 1932 if (isUpdating) 1933 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); 1934 return nullptr; 1935 } 1936 1937 SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 1938 const uint16_t *DOpcodes, 1939 const uint16_t *QOpcodes0, 1940 const uint16_t *QOpcodes1) { 1941 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 1942 SDLoc dl(N); 1943 1944 SDValue MemAddr, Align; 1945 unsigned AddrOpIdx = isUpdating ? 1 : 2; 1946 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 1947 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 1948 return nullptr; 1949 1950 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1951 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1952 1953 SDValue Chain = N->getOperand(0); 1954 EVT VT = N->getOperand(Vec0Idx).getValueType(); 1955 bool is64BitVector = VT.is64BitVector(); 1956 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 1957 1958 unsigned OpcodeIndex; 1959 switch (VT.getSimpleVT().SimpleTy) { 1960 default: llvm_unreachable("unhandled vst type"); 1961 // Double-register operations: 1962 case MVT::v8i8: OpcodeIndex = 0; break; 1963 case MVT::v4i16: OpcodeIndex = 1; break; 1964 case MVT::v2f32: 1965 case MVT::v2i32: OpcodeIndex = 2; break; 1966 case MVT::v1i64: OpcodeIndex = 3; break; 1967 // Quad-register operations: 1968 case MVT::v16i8: OpcodeIndex = 0; break; 1969 case MVT::v8i16: OpcodeIndex = 1; break; 1970 case MVT::v4f32: 1971 case MVT::v4i32: OpcodeIndex = 2; break; 1972 case MVT::v2f64: 1973 case MVT::v2i64: OpcodeIndex = 3; 1974 assert(NumVecs == 1 && "v2i64 type only supported for VST1"); 1975 break; 1976 } 1977 1978 std::vector<EVT> ResTys; 1979 if (isUpdating) 1980 ResTys.push_back(MVT::i32); 1981 ResTys.push_back(MVT::Other); 1982 1983 SDValue Pred = getAL(CurDAG, dl); 1984 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 1985 SmallVector<SDValue, 7> Ops; 1986 1987 // Double registers and VST1/VST2 quad registers are directly supported. 1988 if (is64BitVector || NumVecs <= 2) { 1989 SDValue SrcReg; 1990 if (NumVecs == 1) { 1991 SrcReg = N->getOperand(Vec0Idx); 1992 } else if (is64BitVector) { 1993 // Form a REG_SEQUENCE to force register allocation. 1994 SDValue V0 = N->getOperand(Vec0Idx + 0); 1995 SDValue V1 = N->getOperand(Vec0Idx + 1); 1996 if (NumVecs == 2) 1997 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 1998 else { 1999 SDValue V2 = N->getOperand(Vec0Idx + 2); 2000 // If it's a vst3, form a quad D-register and leave the last part as 2001 // an undef. 2002 SDValue V3 = (NumVecs == 3) 2003 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) 2004 : N->getOperand(Vec0Idx + 3); 2005 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2006 } 2007 } else { 2008 // Form a QQ register. 2009 SDValue Q0 = N->getOperand(Vec0Idx); 2010 SDValue Q1 = N->getOperand(Vec0Idx + 1); 2011 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); 2012 } 2013 2014 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2015 QOpcodes0[OpcodeIndex]); 2016 Ops.push_back(MemAddr); 2017 Ops.push_back(Align); 2018 if (isUpdating) { 2019 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2020 // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0 2021 // case entirely when the rest are updated to that form, too. 2022 if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode())) 2023 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2024 // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so 2025 // check for that explicitly too. Horribly hacky, but temporary. 2026 if (!isa<ConstantSDNode>(Inc.getNode())) 2027 Ops.push_back(Inc); 2028 else if (NumVecs > 2 && !isVSTfixed(Opc)) 2029 Ops.push_back(Reg0); 2030 } 2031 Ops.push_back(SrcReg); 2032 Ops.push_back(Pred); 2033 Ops.push_back(Reg0); 2034 Ops.push_back(Chain); 2035 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2036 2037 // Transfer memoperands. 2038 cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1); 2039 2040 return VSt; 2041 } 2042 2043 // Otherwise, quad registers are stored with two separate instructions, 2044 // where one stores the even registers and the other stores the odd registers. 2045 2046 // Form the QQQQ REG_SEQUENCE. 2047 SDValue V0 = N->getOperand(Vec0Idx + 0); 2048 SDValue V1 = N->getOperand(Vec0Idx + 1); 2049 SDValue V2 = N->getOperand(Vec0Idx + 2); 2050 SDValue V3 = (NumVecs == 3) 2051 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2052 : N->getOperand(Vec0Idx + 3); 2053 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2054 2055 // Store the even D registers. This is always an updating store, so that it 2056 // provides the address to the second store for the odd subregs. 2057 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; 2058 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2059 MemAddr.getValueType(), 2060 MVT::Other, OpsA); 2061 cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1); 2062 Chain = SDValue(VStA, 1); 2063 2064 // Store the odd D registers. 2065 Ops.push_back(SDValue(VStA, 0)); 2066 Ops.push_back(Align); 2067 if (isUpdating) { 2068 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2069 assert(isa<ConstantSDNode>(Inc.getNode()) && 2070 "only constant post-increment update allowed for VST3/4"); 2071 (void)Inc; 2072 Ops.push_back(Reg0); 2073 } 2074 Ops.push_back(RegSeq); 2075 Ops.push_back(Pred); 2076 Ops.push_back(Reg0); 2077 Ops.push_back(Chain); 2078 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, 2079 Ops); 2080 cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1); 2081 return VStB; 2082 } 2083 2084 SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, 2085 bool isUpdating, unsigned NumVecs, 2086 const uint16_t *DOpcodes, 2087 const uint16_t *QOpcodes) { 2088 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); 2089 SDLoc dl(N); 2090 2091 SDValue MemAddr, Align; 2092 unsigned AddrOpIdx = isUpdating ? 1 : 2; 2093 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2094 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2095 return nullptr; 2096 2097 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 2098 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2099 2100 SDValue Chain = N->getOperand(0); 2101 unsigned Lane = 2102 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); 2103 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2104 bool is64BitVector = VT.is64BitVector(); 2105 2106 unsigned Alignment = 0; 2107 if (NumVecs != 3) { 2108 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2109 unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8; 2110 if (Alignment > NumBytes) 2111 Alignment = NumBytes; 2112 if (Alignment < 8 && Alignment < NumBytes) 2113 Alignment = 0; 2114 // Alignment must be a power of two; make sure of that. 2115 Alignment = (Alignment & -Alignment); 2116 if (Alignment == 1) 2117 Alignment = 0; 2118 } 2119 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2120 2121 unsigned OpcodeIndex; 2122 switch (VT.getSimpleVT().SimpleTy) { 2123 default: llvm_unreachable("unhandled vld/vst lane type"); 2124 // Double-register operations: 2125 case MVT::v8i8: OpcodeIndex = 0; break; 2126 case MVT::v4i16: OpcodeIndex = 1; break; 2127 case MVT::v2f32: 2128 case MVT::v2i32: OpcodeIndex = 2; break; 2129 // Quad-register operations: 2130 case MVT::v8i16: OpcodeIndex = 0; break; 2131 case MVT::v4f32: 2132 case MVT::v4i32: OpcodeIndex = 1; break; 2133 } 2134 2135 std::vector<EVT> ResTys; 2136 if (IsLoad) { 2137 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2138 if (!is64BitVector) 2139 ResTyElts *= 2; 2140 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), 2141 MVT::i64, ResTyElts)); 2142 } 2143 if (isUpdating) 2144 ResTys.push_back(MVT::i32); 2145 ResTys.push_back(MVT::Other); 2146 2147 SDValue Pred = getAL(CurDAG, dl); 2148 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2149 2150 SmallVector<SDValue, 8> Ops; 2151 Ops.push_back(MemAddr); 2152 Ops.push_back(Align); 2153 if (isUpdating) { 2154 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2155 Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); 2156 } 2157 2158 SDValue SuperReg; 2159 SDValue V0 = N->getOperand(Vec0Idx + 0); 2160 SDValue V1 = N->getOperand(Vec0Idx + 1); 2161 if (NumVecs == 2) { 2162 if (is64BitVector) 2163 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2164 else 2165 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0); 2166 } else { 2167 SDValue V2 = N->getOperand(Vec0Idx + 2); 2168 SDValue V3 = (NumVecs == 3) 2169 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2170 : N->getOperand(Vec0Idx + 3); 2171 if (is64BitVector) 2172 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2173 else 2174 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2175 } 2176 Ops.push_back(SuperReg); 2177 Ops.push_back(getI32Imm(Lane, dl)); 2178 Ops.push_back(Pred); 2179 Ops.push_back(Reg0); 2180 Ops.push_back(Chain); 2181 2182 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2183 QOpcodes[OpcodeIndex]); 2184 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2185 cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1); 2186 if (!IsLoad) 2187 return VLdLn; 2188 2189 // Extract the subregisters. 2190 SuperReg = SDValue(VLdLn, 0); 2191 assert(ARM::dsub_7 == ARM::dsub_0+7 && 2192 ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); 2193 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2194 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2195 ReplaceUses(SDValue(N, Vec), 2196 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2197 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); 2198 if (isUpdating) 2199 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); 2200 return nullptr; 2201 } 2202 2203 SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, 2204 unsigned NumVecs, 2205 const uint16_t *Opcodes) { 2206 assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); 2207 SDLoc dl(N); 2208 2209 SDValue MemAddr, Align; 2210 if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align)) 2211 return nullptr; 2212 2213 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 2214 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2215 2216 SDValue Chain = N->getOperand(0); 2217 EVT VT = N->getValueType(0); 2218 2219 unsigned Alignment = 0; 2220 if (NumVecs != 3) { 2221 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2222 unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8; 2223 if (Alignment > NumBytes) 2224 Alignment = NumBytes; 2225 if (Alignment < 8 && Alignment < NumBytes) 2226 Alignment = 0; 2227 // Alignment must be a power of two; make sure of that. 2228 Alignment = (Alignment & -Alignment); 2229 if (Alignment == 1) 2230 Alignment = 0; 2231 } 2232 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2233 2234 unsigned OpcodeIndex; 2235 switch (VT.getSimpleVT().SimpleTy) { 2236 default: llvm_unreachable("unhandled vld-dup type"); 2237 case MVT::v8i8: OpcodeIndex = 0; break; 2238 case MVT::v4i16: OpcodeIndex = 1; break; 2239 case MVT::v2f32: 2240 case MVT::v2i32: OpcodeIndex = 2; break; 2241 } 2242 2243 SDValue Pred = getAL(CurDAG, dl); 2244 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2245 SDValue SuperReg; 2246 unsigned Opc = Opcodes[OpcodeIndex]; 2247 SmallVector<SDValue, 6> Ops; 2248 Ops.push_back(MemAddr); 2249 Ops.push_back(Align); 2250 if (isUpdating) { 2251 // fixed-stride update instructions don't have an explicit writeback 2252 // operand. It's implicit in the opcode itself. 2253 SDValue Inc = N->getOperand(2); 2254 if (!isa<ConstantSDNode>(Inc.getNode())) 2255 Ops.push_back(Inc); 2256 // FIXME: VLD3 and VLD4 haven't been updated to that form yet. 2257 else if (NumVecs > 2) 2258 Ops.push_back(Reg0); 2259 } 2260 Ops.push_back(Pred); 2261 Ops.push_back(Reg0); 2262 Ops.push_back(Chain); 2263 2264 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2265 std::vector<EVT> ResTys; 2266 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts)); 2267 if (isUpdating) 2268 ResTys.push_back(MVT::i32); 2269 ResTys.push_back(MVT::Other); 2270 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2271 cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1); 2272 SuperReg = SDValue(VLdDup, 0); 2273 2274 // Extract the subregisters. 2275 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2276 unsigned SubIdx = ARM::dsub_0; 2277 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2278 ReplaceUses(SDValue(N, Vec), 2279 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); 2280 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); 2281 if (isUpdating) 2282 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); 2283 return nullptr; 2284 } 2285 2286 SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, 2287 unsigned Opc) { 2288 assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range"); 2289 SDLoc dl(N); 2290 EVT VT = N->getValueType(0); 2291 unsigned FirstTblReg = IsExt ? 2 : 1; 2292 2293 // Form a REG_SEQUENCE to force register allocation. 2294 SDValue RegSeq; 2295 SDValue V0 = N->getOperand(FirstTblReg + 0); 2296 SDValue V1 = N->getOperand(FirstTblReg + 1); 2297 if (NumVecs == 2) 2298 RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0); 2299 else { 2300 SDValue V2 = N->getOperand(FirstTblReg + 2); 2301 // If it's a vtbl3, form a quad D-register and leave the last part as 2302 // an undef. 2303 SDValue V3 = (NumVecs == 3) 2304 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2305 : N->getOperand(FirstTblReg + 3); 2306 RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2307 } 2308 2309 SmallVector<SDValue, 6> Ops; 2310 if (IsExt) 2311 Ops.push_back(N->getOperand(1)); 2312 Ops.push_back(RegSeq); 2313 Ops.push_back(N->getOperand(FirstTblReg + NumVecs)); 2314 Ops.push_back(getAL(CurDAG, dl)); // predicate 2315 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register 2316 return CurDAG->getMachineNode(Opc, dl, VT, Ops); 2317 } 2318 2319 SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, 2320 bool isSigned) { 2321 if (!Subtarget->hasV6T2Ops()) 2322 return nullptr; 2323 2324 unsigned Opc = isSigned 2325 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) 2326 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); 2327 SDLoc dl(N); 2328 2329 // For unsigned extracts, check for a shift right and mask 2330 unsigned And_imm = 0; 2331 if (N->getOpcode() == ISD::AND) { 2332 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) { 2333 2334 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 2335 if (And_imm & (And_imm + 1)) 2336 return nullptr; 2337 2338 unsigned Srl_imm = 0; 2339 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, 2340 Srl_imm)) { 2341 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2342 2343 // Note: The width operand is encoded as width-1. 2344 unsigned Width = countTrailingOnes(And_imm) - 1; 2345 unsigned LSB = Srl_imm; 2346 2347 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2348 2349 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { 2350 // It's cheaper to use a right shift to extract the top bits. 2351 if (Subtarget->isThumb()) { 2352 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; 2353 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2354 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2355 getAL(CurDAG, dl), Reg0, Reg0 }; 2356 return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2357 } 2358 2359 // ARM models shift instructions as MOVsi with shifter operand. 2360 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); 2361 SDValue ShOpc = 2362 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl, 2363 MVT::i32); 2364 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, 2365 getAL(CurDAG, dl), Reg0, Reg0 }; 2366 return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); 2367 } 2368 2369 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2370 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2371 CurDAG->getTargetConstant(Width, dl, MVT::i32), 2372 getAL(CurDAG, dl), Reg0 }; 2373 return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2374 } 2375 } 2376 return nullptr; 2377 } 2378 2379 // Otherwise, we're looking for a shift of a shift 2380 unsigned Shl_imm = 0; 2381 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { 2382 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); 2383 unsigned Srl_imm = 0; 2384 if (isInt32Immediate(N->getOperand(1), Srl_imm)) { 2385 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2386 // Note: The width operand is encoded as width-1. 2387 unsigned Width = 32 - Srl_imm - 1; 2388 int LSB = Srl_imm - Shl_imm; 2389 if (LSB < 0) 2390 return nullptr; 2391 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2392 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2393 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2394 CurDAG->getTargetConstant(Width, dl, MVT::i32), 2395 getAL(CurDAG, dl), Reg0 }; 2396 return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2397 } 2398 } 2399 2400 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) { 2401 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 2402 unsigned LSB = 0; 2403 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) && 2404 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB)) 2405 return nullptr; 2406 2407 if (LSB + Width > 32) 2408 return nullptr; 2409 2410 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2411 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2412 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2413 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32), 2414 getAL(CurDAG, dl), Reg0 }; 2415 return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2416 } 2417 2418 return nullptr; 2419 } 2420 2421 /// Target-specific DAG combining for ISD::XOR. 2422 /// Target-independent combining lowers SELECT_CC nodes of the form 2423 /// select_cc setg[ge] X, 0, X, -X 2424 /// select_cc setgt X, -1, X, -X 2425 /// select_cc setl[te] X, 0, -X, X 2426 /// select_cc setlt X, 1, -X, X 2427 /// which represent Integer ABS into: 2428 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y) 2429 /// ARM instruction selection detects the latter and matches it to 2430 /// ARM::ABS or ARM::t2ABS machine node. 2431 SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){ 2432 SDValue XORSrc0 = N->getOperand(0); 2433 SDValue XORSrc1 = N->getOperand(1); 2434 EVT VT = N->getValueType(0); 2435 2436 if (Subtarget->isThumb1Only()) 2437 return nullptr; 2438 2439 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA) 2440 return nullptr; 2441 2442 SDValue ADDSrc0 = XORSrc0.getOperand(0); 2443 SDValue ADDSrc1 = XORSrc0.getOperand(1); 2444 SDValue SRASrc0 = XORSrc1.getOperand(0); 2445 SDValue SRASrc1 = XORSrc1.getOperand(1); 2446 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1); 2447 EVT XType = SRASrc0.getValueType(); 2448 unsigned Size = XType.getSizeInBits() - 1; 2449 2450 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 && 2451 XType.isInteger() && SRAConstant != nullptr && 2452 Size == SRAConstant->getZExtValue()) { 2453 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; 2454 return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); 2455 } 2456 2457 return nullptr; 2458 } 2459 2460 SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { 2461 // The only time a CONCAT_VECTORS operation can have legal types is when 2462 // two 64-bit vectors are concatenated to a 128-bit vector. 2463 EVT VT = N->getValueType(0); 2464 if (!VT.is128BitVector() || N->getNumOperands() != 2) 2465 llvm_unreachable("unexpected CONCAT_VECTORS"); 2466 return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1)); 2467 } 2468 2469 SDNode *ARMDAGToDAGISel::Select(SDNode *N) { 2470 SDLoc dl(N); 2471 2472 if (N->isMachineOpcode()) { 2473 N->setNodeId(-1); 2474 return nullptr; // Already selected. 2475 } 2476 2477 switch (N->getOpcode()) { 2478 default: break; 2479 case ISD::WRITE_REGISTER: { 2480 SDNode *ResNode = SelectWriteRegister(N); 2481 if (ResNode) 2482 return ResNode; 2483 break; 2484 } 2485 case ISD::READ_REGISTER: { 2486 SDNode *ResNode = SelectReadRegister(N); 2487 if (ResNode) 2488 return ResNode; 2489 break; 2490 } 2491 case ISD::INLINEASM: { 2492 SDNode *ResNode = SelectInlineAsm(N); 2493 if (ResNode) 2494 return ResNode; 2495 break; 2496 } 2497 case ISD::XOR: { 2498 // Select special operations if XOR node forms integer ABS pattern 2499 SDNode *ResNode = SelectABSOp(N); 2500 if (ResNode) 2501 return ResNode; 2502 // Other cases are autogenerated. 2503 break; 2504 } 2505 case ISD::Constant: { 2506 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); 2507 // If we can't materialize the constant we need to use a literal pool 2508 if (ConstantMaterializationCost(Val) > 2) { 2509 SDValue CPIdx = CurDAG->getTargetConstantPool( 2510 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), 2511 TLI->getPointerTy(CurDAG->getDataLayout())); 2512 2513 SDNode *ResNode; 2514 if (Subtarget->isThumb()) { 2515 SDValue Pred = getAL(CurDAG, dl); 2516 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2517 SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() }; 2518 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, 2519 Ops); 2520 } else { 2521 SDValue Ops[] = { 2522 CPIdx, 2523 CurDAG->getTargetConstant(0, dl, MVT::i32), 2524 getAL(CurDAG, dl), 2525 CurDAG->getRegister(0, MVT::i32), 2526 CurDAG->getEntryNode() 2527 }; 2528 ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, 2529 Ops); 2530 } 2531 ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0)); 2532 return nullptr; 2533 } 2534 2535 // Other cases are autogenerated. 2536 break; 2537 } 2538 case ISD::FrameIndex: { 2539 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. 2540 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 2541 SDValue TFI = CurDAG->getTargetFrameIndex( 2542 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 2543 if (Subtarget->isThumb1Only()) { 2544 // Set the alignment of the frame object to 4, to avoid having to generate 2545 // more than one ADD 2546 MachineFrameInfo *MFI = MF->getFrameInfo(); 2547 if (MFI->getObjectAlignment(FI) < 4) 2548 MFI->setObjectAlignment(FI, 4); 2549 return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, 2550 CurDAG->getTargetConstant(0, dl, MVT::i32)); 2551 } else { 2552 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? 2553 ARM::t2ADDri : ARM::ADDri); 2554 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32), 2555 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 2556 CurDAG->getRegister(0, MVT::i32) }; 2557 return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2558 } 2559 } 2560 case ISD::SRL: 2561 if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false)) 2562 return I; 2563 break; 2564 case ISD::SIGN_EXTEND_INREG: 2565 case ISD::SRA: 2566 if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true)) 2567 return I; 2568 break; 2569 case ISD::MUL: 2570 if (Subtarget->isThumb1Only()) 2571 break; 2572 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 2573 unsigned RHSV = C->getZExtValue(); 2574 if (!RHSV) break; 2575 if (isPowerOf2_32(RHSV-1)) { // 2^n+1? 2576 unsigned ShImm = Log2_32(RHSV-1); 2577 if (ShImm >= 32) 2578 break; 2579 SDValue V = N->getOperand(0); 2580 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 2581 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 2582 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2583 if (Subtarget->isThumb()) { 2584 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 2585 return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); 2586 } else { 2587 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 2588 Reg0 }; 2589 return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); 2590 } 2591 } 2592 if (isPowerOf2_32(RHSV+1)) { // 2^n-1? 2593 unsigned ShImm = Log2_32(RHSV+1); 2594 if (ShImm >= 32) 2595 break; 2596 SDValue V = N->getOperand(0); 2597 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 2598 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 2599 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2600 if (Subtarget->isThumb()) { 2601 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 2602 return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); 2603 } else { 2604 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 2605 Reg0 }; 2606 return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); 2607 } 2608 } 2609 } 2610 break; 2611 case ISD::AND: { 2612 // Check for unsigned bitfield extract 2613 if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false)) 2614 return I; 2615 2616 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits 2617 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits 2618 // are entirely contributed by c2 and lower 16-bits are entirely contributed 2619 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). 2620 // Select it to: "movt x, ((c1 & 0xffff) >> 16) 2621 EVT VT = N->getValueType(0); 2622 if (VT != MVT::i32) 2623 break; 2624 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) 2625 ? ARM::t2MOVTi16 2626 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); 2627 if (!Opc) 2628 break; 2629 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 2630 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2631 if (!N1C) 2632 break; 2633 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { 2634 SDValue N2 = N0.getOperand(1); 2635 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 2636 if (!N2C) 2637 break; 2638 unsigned N1CVal = N1C->getZExtValue(); 2639 unsigned N2CVal = N2C->getZExtValue(); 2640 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) && 2641 (N1CVal & 0xffffU) == 0xffffU && 2642 (N2CVal & 0xffffU) == 0x0U) { 2643 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16, 2644 dl, MVT::i32); 2645 SDValue Ops[] = { N0.getOperand(0), Imm16, 2646 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 2647 return CurDAG->getMachineNode(Opc, dl, VT, Ops); 2648 } 2649 } 2650 break; 2651 } 2652 case ARMISD::VMOVRRD: 2653 return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32, 2654 N->getOperand(0), getAL(CurDAG, dl), 2655 CurDAG->getRegister(0, MVT::i32)); 2656 case ISD::UMUL_LOHI: { 2657 if (Subtarget->isThumb1Only()) 2658 break; 2659 if (Subtarget->isThumb()) { 2660 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 2661 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 2662 return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops); 2663 } else { 2664 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 2665 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 2666 CurDAG->getRegister(0, MVT::i32) }; 2667 return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? 2668 ARM::UMULL : ARM::UMULLv5, 2669 dl, MVT::i32, MVT::i32, Ops); 2670 } 2671 } 2672 case ISD::SMUL_LOHI: { 2673 if (Subtarget->isThumb1Only()) 2674 break; 2675 if (Subtarget->isThumb()) { 2676 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 2677 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 2678 return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops); 2679 } else { 2680 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 2681 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 2682 CurDAG->getRegister(0, MVT::i32) }; 2683 return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? 2684 ARM::SMULL : ARM::SMULLv5, 2685 dl, MVT::i32, MVT::i32, Ops); 2686 } 2687 } 2688 case ARMISD::UMLAL:{ 2689 if (Subtarget->isThumb()) { 2690 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 2691 N->getOperand(3), getAL(CurDAG, dl), 2692 CurDAG->getRegister(0, MVT::i32)}; 2693 return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops); 2694 }else{ 2695 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 2696 N->getOperand(3), getAL(CurDAG, dl), 2697 CurDAG->getRegister(0, MVT::i32), 2698 CurDAG->getRegister(0, MVT::i32) }; 2699 return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? 2700 ARM::UMLAL : ARM::UMLALv5, 2701 dl, MVT::i32, MVT::i32, Ops); 2702 } 2703 } 2704 case ARMISD::SMLAL:{ 2705 if (Subtarget->isThumb()) { 2706 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 2707 N->getOperand(3), getAL(CurDAG, dl), 2708 CurDAG->getRegister(0, MVT::i32)}; 2709 return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops); 2710 }else{ 2711 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 2712 N->getOperand(3), getAL(CurDAG, dl), 2713 CurDAG->getRegister(0, MVT::i32), 2714 CurDAG->getRegister(0, MVT::i32) }; 2715 return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? 2716 ARM::SMLAL : ARM::SMLALv5, 2717 dl, MVT::i32, MVT::i32, Ops); 2718 } 2719 } 2720 case ISD::LOAD: { 2721 SDNode *ResNode = nullptr; 2722 if (Subtarget->isThumb() && Subtarget->hasThumb2()) 2723 ResNode = SelectT2IndexedLoad(N); 2724 else 2725 ResNode = SelectARMIndexedLoad(N); 2726 if (ResNode) 2727 return ResNode; 2728 // Other cases are autogenerated. 2729 break; 2730 } 2731 case ARMISD::BRCOND: { 2732 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 2733 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) 2734 // Pattern complexity = 6 cost = 1 size = 0 2735 2736 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 2737 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc) 2738 // Pattern complexity = 6 cost = 1 size = 0 2739 2740 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 2741 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc) 2742 // Pattern complexity = 6 cost = 1 size = 0 2743 2744 unsigned Opc = Subtarget->isThumb() ? 2745 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; 2746 SDValue Chain = N->getOperand(0); 2747 SDValue N1 = N->getOperand(1); 2748 SDValue N2 = N->getOperand(2); 2749 SDValue N3 = N->getOperand(3); 2750 SDValue InFlag = N->getOperand(4); 2751 assert(N1.getOpcode() == ISD::BasicBlock); 2752 assert(N2.getOpcode() == ISD::Constant); 2753 assert(N3.getOpcode() == ISD::Register); 2754 2755 SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) 2756 cast<ConstantSDNode>(N2)->getZExtValue()), dl, 2757 MVT::i32); 2758 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag }; 2759 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, 2760 MVT::Glue, Ops); 2761 Chain = SDValue(ResNode, 0); 2762 if (N->getNumValues() == 2) { 2763 InFlag = SDValue(ResNode, 1); 2764 ReplaceUses(SDValue(N, 1), InFlag); 2765 } 2766 ReplaceUses(SDValue(N, 0), 2767 SDValue(Chain.getNode(), Chain.getResNo())); 2768 return nullptr; 2769 } 2770 case ARMISD::VZIP: { 2771 unsigned Opc = 0; 2772 EVT VT = N->getValueType(0); 2773 switch (VT.getSimpleVT().SimpleTy) { 2774 default: return nullptr; 2775 case MVT::v8i8: Opc = ARM::VZIPd8; break; 2776 case MVT::v4i16: Opc = ARM::VZIPd16; break; 2777 case MVT::v2f32: 2778 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 2779 case MVT::v2i32: Opc = ARM::VTRNd32; break; 2780 case MVT::v16i8: Opc = ARM::VZIPq8; break; 2781 case MVT::v8i16: Opc = ARM::VZIPq16; break; 2782 case MVT::v4f32: 2783 case MVT::v4i32: Opc = ARM::VZIPq32; break; 2784 } 2785 SDValue Pred = getAL(CurDAG, dl); 2786 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2787 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 2788 return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops); 2789 } 2790 case ARMISD::VUZP: { 2791 unsigned Opc = 0; 2792 EVT VT = N->getValueType(0); 2793 switch (VT.getSimpleVT().SimpleTy) { 2794 default: return nullptr; 2795 case MVT::v8i8: Opc = ARM::VUZPd8; break; 2796 case MVT::v4i16: Opc = ARM::VUZPd16; break; 2797 case MVT::v2f32: 2798 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 2799 case MVT::v2i32: Opc = ARM::VTRNd32; break; 2800 case MVT::v16i8: Opc = ARM::VUZPq8; break; 2801 case MVT::v8i16: Opc = ARM::VUZPq16; break; 2802 case MVT::v4f32: 2803 case MVT::v4i32: Opc = ARM::VUZPq32; break; 2804 } 2805 SDValue Pred = getAL(CurDAG, dl); 2806 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2807 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 2808 return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops); 2809 } 2810 case ARMISD::VTRN: { 2811 unsigned Opc = 0; 2812 EVT VT = N->getValueType(0); 2813 switch (VT.getSimpleVT().SimpleTy) { 2814 default: return nullptr; 2815 case MVT::v8i8: Opc = ARM::VTRNd8; break; 2816 case MVT::v4i16: Opc = ARM::VTRNd16; break; 2817 case MVT::v2f32: 2818 case MVT::v2i32: Opc = ARM::VTRNd32; break; 2819 case MVT::v16i8: Opc = ARM::VTRNq8; break; 2820 case MVT::v8i16: Opc = ARM::VTRNq16; break; 2821 case MVT::v4f32: 2822 case MVT::v4i32: Opc = ARM::VTRNq32; break; 2823 } 2824 SDValue Pred = getAL(CurDAG, dl); 2825 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2826 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 2827 return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops); 2828 } 2829 case ARMISD::BUILD_VECTOR: { 2830 EVT VecVT = N->getValueType(0); 2831 EVT EltVT = VecVT.getVectorElementType(); 2832 unsigned NumElts = VecVT.getVectorNumElements(); 2833 if (EltVT == MVT::f64) { 2834 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); 2835 return createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)); 2836 } 2837 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); 2838 if (NumElts == 2) 2839 return createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)); 2840 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); 2841 return createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), 2842 N->getOperand(2), N->getOperand(3)); 2843 } 2844 2845 case ARMISD::VLD2DUP: { 2846 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 2847 ARM::VLD2DUPd32 }; 2848 return SelectVLDDup(N, false, 2, Opcodes); 2849 } 2850 2851 case ARMISD::VLD3DUP: { 2852 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo, 2853 ARM::VLD3DUPd16Pseudo, 2854 ARM::VLD3DUPd32Pseudo }; 2855 return SelectVLDDup(N, false, 3, Opcodes); 2856 } 2857 2858 case ARMISD::VLD4DUP: { 2859 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo, 2860 ARM::VLD4DUPd16Pseudo, 2861 ARM::VLD4DUPd32Pseudo }; 2862 return SelectVLDDup(N, false, 4, Opcodes); 2863 } 2864 2865 case ARMISD::VLD2DUP_UPD: { 2866 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed, 2867 ARM::VLD2DUPd16wb_fixed, 2868 ARM::VLD2DUPd32wb_fixed }; 2869 return SelectVLDDup(N, true, 2, Opcodes); 2870 } 2871 2872 case ARMISD::VLD3DUP_UPD: { 2873 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, 2874 ARM::VLD3DUPd16Pseudo_UPD, 2875 ARM::VLD3DUPd32Pseudo_UPD }; 2876 return SelectVLDDup(N, true, 3, Opcodes); 2877 } 2878 2879 case ARMISD::VLD4DUP_UPD: { 2880 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, 2881 ARM::VLD4DUPd16Pseudo_UPD, 2882 ARM::VLD4DUPd32Pseudo_UPD }; 2883 return SelectVLDDup(N, true, 4, Opcodes); 2884 } 2885 2886 case ARMISD::VLD1_UPD: { 2887 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed, 2888 ARM::VLD1d16wb_fixed, 2889 ARM::VLD1d32wb_fixed, 2890 ARM::VLD1d64wb_fixed }; 2891 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed, 2892 ARM::VLD1q16wb_fixed, 2893 ARM::VLD1q32wb_fixed, 2894 ARM::VLD1q64wb_fixed }; 2895 return SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); 2896 } 2897 2898 case ARMISD::VLD2_UPD: { 2899 static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed, 2900 ARM::VLD2d16wb_fixed, 2901 ARM::VLD2d32wb_fixed, 2902 ARM::VLD1q64wb_fixed}; 2903 static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed, 2904 ARM::VLD2q16PseudoWB_fixed, 2905 ARM::VLD2q32PseudoWB_fixed }; 2906 return SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 2907 } 2908 2909 case ARMISD::VLD3_UPD: { 2910 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, 2911 ARM::VLD3d16Pseudo_UPD, 2912 ARM::VLD3d32Pseudo_UPD, 2913 ARM::VLD1d64TPseudoWB_fixed}; 2914 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 2915 ARM::VLD3q16Pseudo_UPD, 2916 ARM::VLD3q32Pseudo_UPD }; 2917 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, 2918 ARM::VLD3q16oddPseudo_UPD, 2919 ARM::VLD3q32oddPseudo_UPD }; 2920 return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 2921 } 2922 2923 case ARMISD::VLD4_UPD: { 2924 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, 2925 ARM::VLD4d16Pseudo_UPD, 2926 ARM::VLD4d32Pseudo_UPD, 2927 ARM::VLD1d64QPseudoWB_fixed}; 2928 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 2929 ARM::VLD4q16Pseudo_UPD, 2930 ARM::VLD4q32Pseudo_UPD }; 2931 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD, 2932 ARM::VLD4q16oddPseudo_UPD, 2933 ARM::VLD4q32oddPseudo_UPD }; 2934 return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 2935 } 2936 2937 case ARMISD::VLD2LN_UPD: { 2938 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, 2939 ARM::VLD2LNd16Pseudo_UPD, 2940 ARM::VLD2LNd32Pseudo_UPD }; 2941 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, 2942 ARM::VLD2LNq32Pseudo_UPD }; 2943 return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); 2944 } 2945 2946 case ARMISD::VLD3LN_UPD: { 2947 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, 2948 ARM::VLD3LNd16Pseudo_UPD, 2949 ARM::VLD3LNd32Pseudo_UPD }; 2950 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, 2951 ARM::VLD3LNq32Pseudo_UPD }; 2952 return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); 2953 } 2954 2955 case ARMISD::VLD4LN_UPD: { 2956 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, 2957 ARM::VLD4LNd16Pseudo_UPD, 2958 ARM::VLD4LNd32Pseudo_UPD }; 2959 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, 2960 ARM::VLD4LNq32Pseudo_UPD }; 2961 return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); 2962 } 2963 2964 case ARMISD::VST1_UPD: { 2965 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed, 2966 ARM::VST1d16wb_fixed, 2967 ARM::VST1d32wb_fixed, 2968 ARM::VST1d64wb_fixed }; 2969 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed, 2970 ARM::VST1q16wb_fixed, 2971 ARM::VST1q32wb_fixed, 2972 ARM::VST1q64wb_fixed }; 2973 return SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); 2974 } 2975 2976 case ARMISD::VST2_UPD: { 2977 static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed, 2978 ARM::VST2d16wb_fixed, 2979 ARM::VST2d32wb_fixed, 2980 ARM::VST1q64wb_fixed}; 2981 static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed, 2982 ARM::VST2q16PseudoWB_fixed, 2983 ARM::VST2q32PseudoWB_fixed }; 2984 return SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 2985 } 2986 2987 case ARMISD::VST3_UPD: { 2988 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD, 2989 ARM::VST3d16Pseudo_UPD, 2990 ARM::VST3d32Pseudo_UPD, 2991 ARM::VST1d64TPseudoWB_fixed}; 2992 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 2993 ARM::VST3q16Pseudo_UPD, 2994 ARM::VST3q32Pseudo_UPD }; 2995 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, 2996 ARM::VST3q16oddPseudo_UPD, 2997 ARM::VST3q32oddPseudo_UPD }; 2998 return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 2999 } 3000 3001 case ARMISD::VST4_UPD: { 3002 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD, 3003 ARM::VST4d16Pseudo_UPD, 3004 ARM::VST4d32Pseudo_UPD, 3005 ARM::VST1d64QPseudoWB_fixed}; 3006 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 3007 ARM::VST4q16Pseudo_UPD, 3008 ARM::VST4q32Pseudo_UPD }; 3009 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD, 3010 ARM::VST4q16oddPseudo_UPD, 3011 ARM::VST4q32oddPseudo_UPD }; 3012 return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 3013 } 3014 3015 case ARMISD::VST2LN_UPD: { 3016 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, 3017 ARM::VST2LNd16Pseudo_UPD, 3018 ARM::VST2LNd32Pseudo_UPD }; 3019 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, 3020 ARM::VST2LNq32Pseudo_UPD }; 3021 return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); 3022 } 3023 3024 case ARMISD::VST3LN_UPD: { 3025 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, 3026 ARM::VST3LNd16Pseudo_UPD, 3027 ARM::VST3LNd32Pseudo_UPD }; 3028 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, 3029 ARM::VST3LNq32Pseudo_UPD }; 3030 return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); 3031 } 3032 3033 case ARMISD::VST4LN_UPD: { 3034 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, 3035 ARM::VST4LNd16Pseudo_UPD, 3036 ARM::VST4LNd32Pseudo_UPD }; 3037 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, 3038 ARM::VST4LNq32Pseudo_UPD }; 3039 return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); 3040 } 3041 3042 case ISD::INTRINSIC_VOID: 3043 case ISD::INTRINSIC_W_CHAIN: { 3044 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 3045 switch (IntNo) { 3046 default: 3047 break; 3048 3049 case Intrinsic::arm_ldaexd: 3050 case Intrinsic::arm_ldrexd: { 3051 SDLoc dl(N); 3052 SDValue Chain = N->getOperand(0); 3053 SDValue MemAddr = N->getOperand(2); 3054 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); 3055 3056 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd; 3057 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD) 3058 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD); 3059 3060 // arm_ldrexd returns a i64 value in {i32, i32} 3061 std::vector<EVT> ResTys; 3062 if (isThumb) { 3063 ResTys.push_back(MVT::i32); 3064 ResTys.push_back(MVT::i32); 3065 } else 3066 ResTys.push_back(MVT::Untyped); 3067 ResTys.push_back(MVT::Other); 3068 3069 // Place arguments in the right order. 3070 SmallVector<SDValue, 7> Ops; 3071 Ops.push_back(MemAddr); 3072 Ops.push_back(getAL(CurDAG, dl)); 3073 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 3074 Ops.push_back(Chain); 3075 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 3076 // Transfer memoperands. 3077 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 3078 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3079 cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1); 3080 3081 // Remap uses. 3082 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); 3083 if (!SDValue(N, 0).use_empty()) { 3084 SDValue Result; 3085 if (isThumb) 3086 Result = SDValue(Ld, 0); 3087 else { 3088 SDValue SubRegIdx = 3089 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 3090 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 3091 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 3092 Result = SDValue(ResNode,0); 3093 } 3094 ReplaceUses(SDValue(N, 0), Result); 3095 } 3096 if (!SDValue(N, 1).use_empty()) { 3097 SDValue Result; 3098 if (isThumb) 3099 Result = SDValue(Ld, 1); 3100 else { 3101 SDValue SubRegIdx = 3102 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 3103 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 3104 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 3105 Result = SDValue(ResNode,0); 3106 } 3107 ReplaceUses(SDValue(N, 1), Result); 3108 } 3109 ReplaceUses(SDValue(N, 2), OutChain); 3110 return nullptr; 3111 } 3112 case Intrinsic::arm_stlexd: 3113 case Intrinsic::arm_strexd: { 3114 SDLoc dl(N); 3115 SDValue Chain = N->getOperand(0); 3116 SDValue Val0 = N->getOperand(2); 3117 SDValue Val1 = N->getOperand(3); 3118 SDValue MemAddr = N->getOperand(4); 3119 3120 // Store exclusive double return a i32 value which is the return status 3121 // of the issued store. 3122 const EVT ResTys[] = {MVT::i32, MVT::Other}; 3123 3124 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); 3125 // Place arguments in the right order. 3126 SmallVector<SDValue, 7> Ops; 3127 if (isThumb) { 3128 Ops.push_back(Val0); 3129 Ops.push_back(Val1); 3130 } else 3131 // arm_strexd uses GPRPair. 3132 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0)); 3133 Ops.push_back(MemAddr); 3134 Ops.push_back(getAL(CurDAG, dl)); 3135 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 3136 Ops.push_back(Chain); 3137 3138 bool IsRelease = IntNo == Intrinsic::arm_stlexd; 3139 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD) 3140 : (IsRelease ? ARM::STLEXD : ARM::STREXD); 3141 3142 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 3143 // Transfer memoperands. 3144 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 3145 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3146 cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1); 3147 3148 return St; 3149 } 3150 3151 case Intrinsic::arm_neon_vld1: { 3152 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, 3153 ARM::VLD1d32, ARM::VLD1d64 }; 3154 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 3155 ARM::VLD1q32, ARM::VLD1q64}; 3156 return SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); 3157 } 3158 3159 case Intrinsic::arm_neon_vld2: { 3160 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, 3161 ARM::VLD2d32, ARM::VLD1q64 }; 3162 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, 3163 ARM::VLD2q32Pseudo }; 3164 return SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 3165 } 3166 3167 case Intrinsic::arm_neon_vld3: { 3168 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo, 3169 ARM::VLD3d16Pseudo, 3170 ARM::VLD3d32Pseudo, 3171 ARM::VLD1d64TPseudo }; 3172 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 3173 ARM::VLD3q16Pseudo_UPD, 3174 ARM::VLD3q32Pseudo_UPD }; 3175 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo, 3176 ARM::VLD3q16oddPseudo, 3177 ARM::VLD3q32oddPseudo }; 3178 return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 3179 } 3180 3181 case Intrinsic::arm_neon_vld4: { 3182 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo, 3183 ARM::VLD4d16Pseudo, 3184 ARM::VLD4d32Pseudo, 3185 ARM::VLD1d64QPseudo }; 3186 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 3187 ARM::VLD4q16Pseudo_UPD, 3188 ARM::VLD4q32Pseudo_UPD }; 3189 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo, 3190 ARM::VLD4q16oddPseudo, 3191 ARM::VLD4q32oddPseudo }; 3192 return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 3193 } 3194 3195 case Intrinsic::arm_neon_vld2lane: { 3196 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo, 3197 ARM::VLD2LNd16Pseudo, 3198 ARM::VLD2LNd32Pseudo }; 3199 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo, 3200 ARM::VLD2LNq32Pseudo }; 3201 return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); 3202 } 3203 3204 case Intrinsic::arm_neon_vld3lane: { 3205 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo, 3206 ARM::VLD3LNd16Pseudo, 3207 ARM::VLD3LNd32Pseudo }; 3208 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo, 3209 ARM::VLD3LNq32Pseudo }; 3210 return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); 3211 } 3212 3213 case Intrinsic::arm_neon_vld4lane: { 3214 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo, 3215 ARM::VLD4LNd16Pseudo, 3216 ARM::VLD4LNd32Pseudo }; 3217 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo, 3218 ARM::VLD4LNq32Pseudo }; 3219 return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); 3220 } 3221 3222 case Intrinsic::arm_neon_vst1: { 3223 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, 3224 ARM::VST1d32, ARM::VST1d64 }; 3225 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 3226 ARM::VST1q32, ARM::VST1q64 }; 3227 return SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); 3228 } 3229 3230 case Intrinsic::arm_neon_vst2: { 3231 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, 3232 ARM::VST2d32, ARM::VST1q64 }; 3233 static uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, 3234 ARM::VST2q32Pseudo }; 3235 return SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 3236 } 3237 3238 case Intrinsic::arm_neon_vst3: { 3239 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo, 3240 ARM::VST3d16Pseudo, 3241 ARM::VST3d32Pseudo, 3242 ARM::VST1d64TPseudo }; 3243 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 3244 ARM::VST3q16Pseudo_UPD, 3245 ARM::VST3q32Pseudo_UPD }; 3246 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo, 3247 ARM::VST3q16oddPseudo, 3248 ARM::VST3q32oddPseudo }; 3249 return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 3250 } 3251 3252 case Intrinsic::arm_neon_vst4: { 3253 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo, 3254 ARM::VST4d16Pseudo, 3255 ARM::VST4d32Pseudo, 3256 ARM::VST1d64QPseudo }; 3257 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 3258 ARM::VST4q16Pseudo_UPD, 3259 ARM::VST4q32Pseudo_UPD }; 3260 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo, 3261 ARM::VST4q16oddPseudo, 3262 ARM::VST4q32oddPseudo }; 3263 return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 3264 } 3265 3266 case Intrinsic::arm_neon_vst2lane: { 3267 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo, 3268 ARM::VST2LNd16Pseudo, 3269 ARM::VST2LNd32Pseudo }; 3270 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo, 3271 ARM::VST2LNq32Pseudo }; 3272 return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); 3273 } 3274 3275 case Intrinsic::arm_neon_vst3lane: { 3276 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo, 3277 ARM::VST3LNd16Pseudo, 3278 ARM::VST3LNd32Pseudo }; 3279 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo, 3280 ARM::VST3LNq32Pseudo }; 3281 return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); 3282 } 3283 3284 case Intrinsic::arm_neon_vst4lane: { 3285 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo, 3286 ARM::VST4LNd16Pseudo, 3287 ARM::VST4LNd32Pseudo }; 3288 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo, 3289 ARM::VST4LNq32Pseudo }; 3290 return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); 3291 } 3292 } 3293 break; 3294 } 3295 3296 case ISD::INTRINSIC_WO_CHAIN: { 3297 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 3298 switch (IntNo) { 3299 default: 3300 break; 3301 3302 case Intrinsic::arm_neon_vtbl2: 3303 return SelectVTBL(N, false, 2, ARM::VTBL2); 3304 case Intrinsic::arm_neon_vtbl3: 3305 return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo); 3306 case Intrinsic::arm_neon_vtbl4: 3307 return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo); 3308 3309 case Intrinsic::arm_neon_vtbx2: 3310 return SelectVTBL(N, true, 2, ARM::VTBX2); 3311 case Intrinsic::arm_neon_vtbx3: 3312 return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo); 3313 case Intrinsic::arm_neon_vtbx4: 3314 return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo); 3315 } 3316 break; 3317 } 3318 3319 case ARMISD::VTBL1: { 3320 SDLoc dl(N); 3321 EVT VT = N->getValueType(0); 3322 SmallVector<SDValue, 6> Ops; 3323 3324 Ops.push_back(N->getOperand(0)); 3325 Ops.push_back(N->getOperand(1)); 3326 Ops.push_back(getAL(CurDAG, dl)); // Predicate 3327 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register 3328 return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops); 3329 } 3330 case ARMISD::VTBL2: { 3331 SDLoc dl(N); 3332 EVT VT = N->getValueType(0); 3333 3334 // Form a REG_SEQUENCE to force register allocation. 3335 SDValue V0 = N->getOperand(0); 3336 SDValue V1 = N->getOperand(1); 3337 SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0); 3338 3339 SmallVector<SDValue, 6> Ops; 3340 Ops.push_back(RegSeq); 3341 Ops.push_back(N->getOperand(2)); 3342 Ops.push_back(getAL(CurDAG, dl)); // Predicate 3343 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register 3344 return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops); 3345 } 3346 3347 case ISD::CONCAT_VECTORS: 3348 return SelectConcatVector(N); 3349 } 3350 3351 return SelectCode(N); 3352 } 3353 3354 // Inspect a register string of the form 3355 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or 3356 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string 3357 // and obtain the integer operands from them, adding these operands to the 3358 // provided vector. 3359 static void getIntOperandsFromRegisterString(StringRef RegString, 3360 SelectionDAG *CurDAG, SDLoc DL, 3361 std::vector<SDValue>& Ops) { 3362 SmallVector<StringRef, 5> Fields; 3363 RegString.split(Fields, ':'); 3364 3365 if (Fields.size() > 1) { 3366 bool AllIntFields = true; 3367 3368 for (StringRef Field : Fields) { 3369 // Need to trim out leading 'cp' characters and get the integer field. 3370 unsigned IntField; 3371 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField); 3372 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32)); 3373 } 3374 3375 assert(AllIntFields && 3376 "Unexpected non-integer value in special register string."); 3377 } 3378 } 3379 3380 // Maps a Banked Register string to its mask value. The mask value returned is 3381 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register 3382 // mask operand, which expresses which register is to be used, e.g. r8, and in 3383 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string 3384 // was invalid. 3385 static inline int getBankedRegisterMask(StringRef RegString) { 3386 return StringSwitch<int>(RegString.lower()) 3387 .Case("r8_usr", 0x00) 3388 .Case("r9_usr", 0x01) 3389 .Case("r10_usr", 0x02) 3390 .Case("r11_usr", 0x03) 3391 .Case("r12_usr", 0x04) 3392 .Case("sp_usr", 0x05) 3393 .Case("lr_usr", 0x06) 3394 .Case("r8_fiq", 0x08) 3395 .Case("r9_fiq", 0x09) 3396 .Case("r10_fiq", 0x0a) 3397 .Case("r11_fiq", 0x0b) 3398 .Case("r12_fiq", 0x0c) 3399 .Case("sp_fiq", 0x0d) 3400 .Case("lr_fiq", 0x0e) 3401 .Case("lr_irq", 0x10) 3402 .Case("sp_irq", 0x11) 3403 .Case("lr_svc", 0x12) 3404 .Case("sp_svc", 0x13) 3405 .Case("lr_abt", 0x14) 3406 .Case("sp_abt", 0x15) 3407 .Case("lr_und", 0x16) 3408 .Case("sp_und", 0x17) 3409 .Case("lr_mon", 0x1c) 3410 .Case("sp_mon", 0x1d) 3411 .Case("elr_hyp", 0x1e) 3412 .Case("sp_hyp", 0x1f) 3413 .Case("spsr_fiq", 0x2e) 3414 .Case("spsr_irq", 0x30) 3415 .Case("spsr_svc", 0x32) 3416 .Case("spsr_abt", 0x34) 3417 .Case("spsr_und", 0x36) 3418 .Case("spsr_mon", 0x3c) 3419 .Case("spsr_hyp", 0x3e) 3420 .Default(-1); 3421 } 3422 3423 // Maps a MClass special register string to its value for use in the 3424 // t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand. 3425 // Returns -1 to signify that the string was invalid. 3426 static inline int getMClassRegisterSYSmValueMask(StringRef RegString) { 3427 return StringSwitch<int>(RegString.lower()) 3428 .Case("apsr", 0x0) 3429 .Case("iapsr", 0x1) 3430 .Case("eapsr", 0x2) 3431 .Case("xpsr", 0x3) 3432 .Case("ipsr", 0x5) 3433 .Case("epsr", 0x6) 3434 .Case("iepsr", 0x7) 3435 .Case("msp", 0x8) 3436 .Case("psp", 0x9) 3437 .Case("primask", 0x10) 3438 .Case("basepri", 0x11) 3439 .Case("basepri_max", 0x12) 3440 .Case("faultmask", 0x13) 3441 .Case("control", 0x14) 3442 .Default(-1); 3443 } 3444 3445 // The flags here are common to those allowed for apsr in the A class cores and 3446 // those allowed for the special registers in the M class cores. Returns a 3447 // value representing which flags were present, -1 if invalid. 3448 static inline int getMClassFlagsMask(StringRef Flags, bool hasDSP) { 3449 if (Flags.empty()) 3450 return 0x2 | (int)hasDSP; 3451 3452 return StringSwitch<int>(Flags) 3453 .Case("g", 0x1) 3454 .Case("nzcvq", 0x2) 3455 .Case("nzcvqg", 0x3) 3456 .Default(-1); 3457 } 3458 3459 static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead, 3460 const ARMSubtarget *Subtarget) { 3461 // Ensure that the register (without flags) was a valid M Class special 3462 // register. 3463 int SYSmvalue = getMClassRegisterSYSmValueMask(Reg); 3464 if (SYSmvalue == -1) 3465 return -1; 3466 3467 // basepri, basepri_max and faultmask are only valid for V7m. 3468 if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13) 3469 return -1; 3470 3471 // If it was a read then we won't be expecting flags and so at this point 3472 // we can return the mask. 3473 if (IsRead) { 3474 assert (Flags.empty() && "Unexpected flags for reading M class register."); 3475 return SYSmvalue; 3476 } 3477 3478 // We know we are now handling a write so need to get the mask for the flags. 3479 int Mask = getMClassFlagsMask(Flags, Subtarget->hasDSP()); 3480 3481 // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values 3482 // shouldn't have flags present. 3483 if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty())) 3484 return -1; 3485 3486 // The _g and _nzcvqg versions are only valid if the DSP extension is 3487 // available. 3488 if (!Subtarget->hasDSP() && (Mask & 0x1)) 3489 return -1; 3490 3491 // The register was valid so need to put the mask in the correct place 3492 // (the flags need to be in bits 11-10) and combine with the SYSmvalue to 3493 // construct the operand for the instruction node. 3494 if (SYSmvalue < 0x4) 3495 return SYSmvalue | Mask << 10; 3496 3497 return SYSmvalue; 3498 } 3499 3500 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { 3501 // The mask operand contains the special register (R Bit) in bit 4, whether 3502 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and 3503 // bits 3-0 contains the fields to be accessed in the special register, set by 3504 // the flags provided with the register. 3505 int Mask = 0; 3506 if (Reg == "apsr") { 3507 // The flags permitted for apsr are the same flags that are allowed in 3508 // M class registers. We get the flag value and then shift the flags into 3509 // the correct place to combine with the mask. 3510 Mask = getMClassFlagsMask(Flags, true); 3511 if (Mask == -1) 3512 return -1; 3513 return Mask << 2; 3514 } 3515 3516 if (Reg != "cpsr" && Reg != "spsr") { 3517 return -1; 3518 } 3519 3520 // This is the same as if the flags were "fc" 3521 if (Flags.empty() || Flags == "all") 3522 return Mask | 0x9; 3523 3524 // Inspect the supplied flags string and set the bits in the mask for 3525 // the relevant and valid flags allowed for cpsr and spsr. 3526 for (char Flag : Flags) { 3527 int FlagVal; 3528 switch (Flag) { 3529 case 'c': 3530 FlagVal = 0x1; 3531 break; 3532 case 'x': 3533 FlagVal = 0x2; 3534 break; 3535 case 's': 3536 FlagVal = 0x4; 3537 break; 3538 case 'f': 3539 FlagVal = 0x8; 3540 break; 3541 default: 3542 FlagVal = 0; 3543 } 3544 3545 // This avoids allowing strings where the same flag bit appears twice. 3546 if (!FlagVal || (Mask & FlagVal)) 3547 return -1; 3548 Mask |= FlagVal; 3549 } 3550 3551 // If the register is spsr then we need to set the R bit. 3552 if (Reg == "spsr") 3553 Mask |= 0x10; 3554 3555 return Mask; 3556 } 3557 3558 // Lower the read_register intrinsic to ARM specific DAG nodes 3559 // using the supplied metadata string to select the instruction node to use 3560 // and the registers/masks to construct as operands for the node. 3561 SDNode *ARMDAGToDAGISel::SelectReadRegister(SDNode *N){ 3562 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 3563 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 3564 bool IsThumb2 = Subtarget->isThumb2(); 3565 SDLoc DL(N); 3566 3567 std::vector<SDValue> Ops; 3568 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 3569 3570 if (!Ops.empty()) { 3571 // If the special register string was constructed of fields (as defined 3572 // in the ACLE) then need to lower to MRC node (32 bit) or 3573 // MRRC node(64 bit), we can make the distinction based on the number of 3574 // operands we have. 3575 unsigned Opcode; 3576 SmallVector<EVT, 3> ResTypes; 3577 if (Ops.size() == 5){ 3578 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC; 3579 ResTypes.append({ MVT::i32, MVT::Other }); 3580 } else { 3581 assert(Ops.size() == 3 && 3582 "Invalid number of fields in special register string."); 3583 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC; 3584 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other }); 3585 } 3586 3587 Ops.push_back(getAL(CurDAG, DL)); 3588 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 3589 Ops.push_back(N->getOperand(0)); 3590 return CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops); 3591 } 3592 3593 std::string SpecialReg = RegString->getString().lower(); 3594 3595 int BankedReg = getBankedRegisterMask(SpecialReg); 3596 if (BankedReg != -1) { 3597 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), 3598 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 3599 N->getOperand(0) }; 3600 return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked, 3601 DL, MVT::i32, MVT::Other, Ops); 3602 } 3603 3604 // The VFP registers are read by creating SelectionDAG nodes with opcodes 3605 // corresponding to the register that is being read from. So we switch on the 3606 // string to find which opcode we need to use. 3607 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 3608 .Case("fpscr", ARM::VMRS) 3609 .Case("fpexc", ARM::VMRS_FPEXC) 3610 .Case("fpsid", ARM::VMRS_FPSID) 3611 .Case("mvfr0", ARM::VMRS_MVFR0) 3612 .Case("mvfr1", ARM::VMRS_MVFR1) 3613 .Case("mvfr2", ARM::VMRS_MVFR2) 3614 .Case("fpinst", ARM::VMRS_FPINST) 3615 .Case("fpinst2", ARM::VMRS_FPINST2) 3616 .Default(0); 3617 3618 // If an opcode was found then we can lower the read to a VFP instruction. 3619 if (Opcode) { 3620 if (!Subtarget->hasVFP2()) 3621 return nullptr; 3622 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8()) 3623 return nullptr; 3624 3625 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 3626 N->getOperand(0) }; 3627 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops); 3628 } 3629 3630 // If the target is M Class then need to validate that the register string 3631 // is an acceptable value, so check that a mask can be constructed from the 3632 // string. 3633 if (Subtarget->isMClass()) { 3634 int SYSmValue = getMClassRegisterMask(SpecialReg, "", true, Subtarget); 3635 if (SYSmValue == -1) 3636 return nullptr; 3637 3638 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 3639 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 3640 N->getOperand(0) }; 3641 return CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops); 3642 } 3643 3644 // Here we know the target is not M Class so we need to check if it is one 3645 // of the remaining possible values which are apsr, cpsr or spsr. 3646 if (SpecialReg == "apsr" || SpecialReg == "cpsr") { 3647 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 3648 N->getOperand(0) }; 3649 return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, DL, 3650 MVT::i32, MVT::Other, Ops); 3651 } 3652 3653 if (SpecialReg == "spsr") { 3654 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 3655 N->getOperand(0) }; 3656 return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, 3657 DL, MVT::i32, MVT::Other, Ops); 3658 } 3659 3660 return nullptr; 3661 } 3662 3663 // Lower the write_register intrinsic to ARM specific DAG nodes 3664 // using the supplied metadata string to select the instruction node to use 3665 // and the registers/masks to use in the nodes 3666 SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){ 3667 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 3668 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 3669 bool IsThumb2 = Subtarget->isThumb2(); 3670 SDLoc DL(N); 3671 3672 std::vector<SDValue> Ops; 3673 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 3674 3675 if (!Ops.empty()) { 3676 // If the special register string was constructed of fields (as defined 3677 // in the ACLE) then need to lower to MCR node (32 bit) or 3678 // MCRR node(64 bit), we can make the distinction based on the number of 3679 // operands we have. 3680 unsigned Opcode; 3681 if (Ops.size() == 5) { 3682 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR; 3683 Ops.insert(Ops.begin()+2, N->getOperand(2)); 3684 } else { 3685 assert(Ops.size() == 3 && 3686 "Invalid number of fields in special register string."); 3687 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR; 3688 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) }; 3689 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2); 3690 } 3691 3692 Ops.push_back(getAL(CurDAG, DL)); 3693 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 3694 Ops.push_back(N->getOperand(0)); 3695 3696 return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops); 3697 } 3698 3699 std::string SpecialReg = RegString->getString().lower(); 3700 int BankedReg = getBankedRegisterMask(SpecialReg); 3701 if (BankedReg != -1) { 3702 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2), 3703 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 3704 N->getOperand(0) }; 3705 return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked, 3706 DL, MVT::Other, Ops); 3707 } 3708 3709 // The VFP registers are written to by creating SelectionDAG nodes with 3710 // opcodes corresponding to the register that is being written. So we switch 3711 // on the string to find which opcode we need to use. 3712 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 3713 .Case("fpscr", ARM::VMSR) 3714 .Case("fpexc", ARM::VMSR_FPEXC) 3715 .Case("fpsid", ARM::VMSR_FPSID) 3716 .Case("fpinst", ARM::VMSR_FPINST) 3717 .Case("fpinst2", ARM::VMSR_FPINST2) 3718 .Default(0); 3719 3720 if (Opcode) { 3721 if (!Subtarget->hasVFP2()) 3722 return nullptr; 3723 Ops = { N->getOperand(2), getAL(CurDAG, DL), 3724 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 3725 return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops); 3726 } 3727 3728 SmallVector<StringRef, 5> Fields; 3729 StringRef(SpecialReg).split(Fields, '_', 1, false); 3730 std::string Reg = Fields[0].str(); 3731 StringRef Flags = Fields.size() == 2 ? Fields[1] : ""; 3732 3733 // If the target was M Class then need to validate the special register value 3734 // and retrieve the mask for use in the instruction node. 3735 if (Subtarget->isMClass()) { 3736 // basepri_max gets split so need to correct Reg and Flags. 3737 if (SpecialReg == "basepri_max") { 3738 Reg = SpecialReg; 3739 Flags = ""; 3740 } 3741 int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget); 3742 if (SYSmValue == -1) 3743 return nullptr; 3744 3745 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 3746 N->getOperand(2), getAL(CurDAG, DL), 3747 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 3748 return CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops); 3749 } 3750 3751 // We then check to see if a valid mask can be constructed for one of the 3752 // register string values permitted for the A and R class cores. These values 3753 // are apsr, spsr and cpsr; these are also valid on older cores. 3754 int Mask = getARClassRegisterMask(Reg, Flags); 3755 if (Mask != -1) { 3756 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2), 3757 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 3758 N->getOperand(0) }; 3759 return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR, 3760 DL, MVT::Other, Ops); 3761 } 3762 3763 return nullptr; 3764 } 3765 3766 SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ 3767 std::vector<SDValue> AsmNodeOperands; 3768 unsigned Flag, Kind; 3769 bool Changed = false; 3770 unsigned NumOps = N->getNumOperands(); 3771 3772 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. 3773 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require 3774 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs 3775 // respectively. Since there is no constraint to explicitly specify a 3776 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb, 3777 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack 3778 // them into a GPRPair. 3779 3780 SDLoc dl(N); 3781 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) 3782 : SDValue(nullptr,0); 3783 3784 SmallVector<bool, 8> OpChanged; 3785 // Glue node will be appended late. 3786 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { 3787 SDValue op = N->getOperand(i); 3788 AsmNodeOperands.push_back(op); 3789 3790 if (i < InlineAsm::Op_FirstOperand) 3791 continue; 3792 3793 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) { 3794 Flag = C->getZExtValue(); 3795 Kind = InlineAsm::getKind(Flag); 3796 } 3797 else 3798 continue; 3799 3800 // Immediate operands to inline asm in the SelectionDAG are modeled with 3801 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and 3802 // the second is a constant with the value of the immediate. If we get here 3803 // and we have a Kind_Imm, skip the next operand, and continue. 3804 if (Kind == InlineAsm::Kind_Imm) { 3805 SDValue op = N->getOperand(++i); 3806 AsmNodeOperands.push_back(op); 3807 continue; 3808 } 3809 3810 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); 3811 if (NumRegs) 3812 OpChanged.push_back(false); 3813 3814 unsigned DefIdx = 0; 3815 bool IsTiedToChangedOp = false; 3816 // If it's a use that is tied with a previous def, it has no 3817 // reg class constraint. 3818 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) 3819 IsTiedToChangedOp = OpChanged[DefIdx]; 3820 3821 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef 3822 && Kind != InlineAsm::Kind_RegDefEarlyClobber) 3823 continue; 3824 3825 unsigned RC; 3826 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); 3827 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID)) 3828 || NumRegs != 2) 3829 continue; 3830 3831 assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); 3832 SDValue V0 = N->getOperand(i+1); 3833 SDValue V1 = N->getOperand(i+2); 3834 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg(); 3835 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg(); 3836 SDValue PairedReg; 3837 MachineRegisterInfo &MRI = MF->getRegInfo(); 3838 3839 if (Kind == InlineAsm::Kind_RegDef || 3840 Kind == InlineAsm::Kind_RegDefEarlyClobber) { 3841 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to 3842 // the original GPRs. 3843 3844 unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 3845 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 3846 SDValue Chain = SDValue(N,0); 3847 3848 SDNode *GU = N->getGluedUser(); 3849 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, 3850 Chain.getValue(1)); 3851 3852 // Extract values from a GPRPair reg and copy to the original GPR reg. 3853 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 3854 RegCopy); 3855 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 3856 RegCopy); 3857 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, 3858 RegCopy.getValue(1)); 3859 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); 3860 3861 // Update the original glue user. 3862 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); 3863 Ops.push_back(T1.getValue(1)); 3864 CurDAG->UpdateNodeOperands(GU, Ops); 3865 } 3866 else { 3867 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a 3868 // GPRPair and then pass the GPRPair to the inline asm. 3869 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; 3870 3871 // As REG_SEQ doesn't take RegisterSDNode, we copy them first. 3872 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, 3873 Chain.getValue(1)); 3874 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, 3875 T0.getValue(1)); 3876 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); 3877 3878 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two 3879 // i32 VRs of inline asm with it. 3880 unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 3881 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 3882 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); 3883 3884 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 3885 Glue = Chain.getValue(1); 3886 } 3887 3888 Changed = true; 3889 3890 if(PairedReg.getNode()) { 3891 OpChanged[OpChanged.size() -1 ] = true; 3892 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); 3893 if (IsTiedToChangedOp) 3894 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx); 3895 else 3896 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); 3897 // Replace the current flag. 3898 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( 3899 Flag, dl, MVT::i32); 3900 // Add the new register node and skip the original two GPRs. 3901 AsmNodeOperands.push_back(PairedReg); 3902 // Skip the next two GPRs. 3903 i += 2; 3904 } 3905 } 3906 3907 if (Glue.getNode()) 3908 AsmNodeOperands.push_back(Glue); 3909 if (!Changed) 3910 return nullptr; 3911 3912 SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), 3913 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); 3914 New->setNodeId(-1); 3915 return New.getNode(); 3916 } 3917 3918 3919 bool ARMDAGToDAGISel:: 3920 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 3921 std::vector<SDValue> &OutOps) { 3922 switch(ConstraintID) { 3923 default: 3924 llvm_unreachable("Unexpected asm memory constraint"); 3925 case InlineAsm::Constraint_i: 3926 // FIXME: It seems strange that 'i' is needed here since it's supposed to 3927 // be an immediate and not a memory constraint. 3928 // Fallthrough. 3929 case InlineAsm::Constraint_m: 3930 case InlineAsm::Constraint_o: 3931 case InlineAsm::Constraint_Q: 3932 case InlineAsm::Constraint_Um: 3933 case InlineAsm::Constraint_Un: 3934 case InlineAsm::Constraint_Uq: 3935 case InlineAsm::Constraint_Us: 3936 case InlineAsm::Constraint_Ut: 3937 case InlineAsm::Constraint_Uv: 3938 case InlineAsm::Constraint_Uy: 3939 // Require the address to be in a register. That is safe for all ARM 3940 // variants and it is hard to do anything much smarter without knowing 3941 // how the operand is used. 3942 OutOps.push_back(Op); 3943 return false; 3944 } 3945 return true; 3946 } 3947 3948 /// createARMISelDag - This pass converts a legalized DAG into a 3949 /// ARM-specific DAG, ready for instruction scheduling. 3950 /// 3951 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, 3952 CodeGenOpt::Level OptLevel) { 3953 return new ARMDAGToDAGISel(TM, OptLevel); 3954 } 3955