1 //===-- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface --*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Subclass of MipsTargetLowering specialized for mips32/64. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "MipsSEISelLowering.h" 14 #include "MipsRegisterInfo.h" 15 #include "MipsTargetMachine.h" 16 #include "llvm/CodeGen/MachineInstrBuilder.h" 17 #include "llvm/CodeGen/MachineRegisterInfo.h" 18 #include "llvm/IR/Intrinsics.h" 19 #include "llvm/Support/CommandLine.h" 20 #include "llvm/Support/Debug.h" 21 #include "llvm/Support/raw_ostream.h" 22 #include "llvm/Target/TargetInstrInfo.h" 23 24 using namespace llvm; 25 26 #define DEBUG_TYPE "mips-isel" 27 28 static cl::opt<bool> 29 EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden, 30 cl::desc("MIPS: Enable tail calls."), cl::init(false)); 31 32 static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), 33 cl::desc("Expand double precision loads and " 34 "stores to their single precision " 35 "counterparts")); 36 37 MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) 38 : MipsTargetLowering(TM) { 39 // Set up the register classes 40 addRegisterClass(MVT::i32, &Mips::GPR32RegClass); 41 42 if (Subtarget->isGP64bit()) 43 addRegisterClass(MVT::i64, &Mips::GPR64RegClass); 44 45 if (Subtarget->hasDSP() || Subtarget->hasMSA()) { 46 // Expand all truncating stores and extending loads. 47 unsigned FirstVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 48 unsigned LastVT = (unsigned)MVT::LAST_VECTOR_VALUETYPE; 49 50 for (unsigned VT0 = FirstVT; VT0 <= LastVT; ++VT0) { 51 for (unsigned VT1 = FirstVT; VT1 <= LastVT; ++VT1) 52 setTruncStoreAction((MVT::SimpleValueType)VT0, 53 (MVT::SimpleValueType)VT1, Expand); 54 55 setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT0, Expand); 56 setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT0, Expand); 57 setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT0, Expand); 58 } 59 } 60 61 if (Subtarget->hasDSP()) { 62 MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; 63 64 for (unsigned i = 0; i < array_lengthof(VecTys); ++i) { 65 addRegisterClass(VecTys[i], &Mips::DSPRRegClass); 66 67 // Expand all builtin opcodes. 68 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 69 setOperationAction(Opc, VecTys[i], Expand); 70 71 setOperationAction(ISD::ADD, VecTys[i], Legal); 72 setOperationAction(ISD::SUB, VecTys[i], Legal); 73 setOperationAction(ISD::LOAD, VecTys[i], Legal); 74 setOperationAction(ISD::STORE, VecTys[i], Legal); 75 setOperationAction(ISD::BITCAST, VecTys[i], Legal); 76 } 77 78 setTargetDAGCombine(ISD::SHL); 79 setTargetDAGCombine(ISD::SRA); 80 setTargetDAGCombine(ISD::SRL); 81 setTargetDAGCombine(ISD::SETCC); 82 setTargetDAGCombine(ISD::VSELECT); 83 } 84 85 if (Subtarget->hasDSPR2()) 86 setOperationAction(ISD::MUL, MVT::v2i16, Legal); 87 88 if (Subtarget->hasMSA()) { 89 addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass); 90 addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass); 91 addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass); 92 addMSAIntType(MVT::v2i64, &Mips::MSA128DRegClass); 93 addMSAFloatType(MVT::v8f16, &Mips::MSA128HRegClass); 94 addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass); 95 addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass); 96 97 setTargetDAGCombine(ISD::AND); 98 setTargetDAGCombine(ISD::OR); 99 setTargetDAGCombine(ISD::SRA); 100 setTargetDAGCombine(ISD::VSELECT); 101 setTargetDAGCombine(ISD::XOR); 102 } 103 104 if (!Subtarget->mipsSEUsesSoftFloat()) { 105 addRegisterClass(MVT::f32, &Mips::FGR32RegClass); 106 107 // When dealing with single precision only, use libcalls 108 if (!Subtarget->isSingleFloat()) { 109 if (Subtarget->isFP64bit()) 110 addRegisterClass(MVT::f64, &Mips::FGR64RegClass); 111 else 112 addRegisterClass(MVT::f64, &Mips::AFGR64RegClass); 113 } 114 } 115 116 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom); 117 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom); 118 setOperationAction(ISD::MULHS, MVT::i32, Custom); 119 setOperationAction(ISD::MULHU, MVT::i32, Custom); 120 121 if (Subtarget->hasCnMips()) 122 setOperationAction(ISD::MUL, MVT::i64, Legal); 123 else if (Subtarget->isGP64bit()) 124 setOperationAction(ISD::MUL, MVT::i64, Custom); 125 126 if (Subtarget->isGP64bit()) { 127 setOperationAction(ISD::MULHS, MVT::i64, Custom); 128 setOperationAction(ISD::MULHU, MVT::i64, Custom); 129 } 130 131 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 132 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 133 134 setOperationAction(ISD::SDIVREM, MVT::i32, Custom); 135 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 136 setOperationAction(ISD::SDIVREM, MVT::i64, Custom); 137 setOperationAction(ISD::UDIVREM, MVT::i64, Custom); 138 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 139 setOperationAction(ISD::LOAD, MVT::i32, Custom); 140 setOperationAction(ISD::STORE, MVT::i32, Custom); 141 142 setTargetDAGCombine(ISD::ADDE); 143 setTargetDAGCombine(ISD::SUBE); 144 setTargetDAGCombine(ISD::MUL); 145 146 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 147 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 148 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 149 150 if (NoDPLoadStore) { 151 setOperationAction(ISD::LOAD, MVT::f64, Custom); 152 setOperationAction(ISD::STORE, MVT::f64, Custom); 153 } 154 155 if (Subtarget->hasMips32r6()) { 156 // MIPS32r6 replaces the accumulator-based multiplies with a three register 157 // instruction 158 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 159 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 160 setOperationAction(ISD::MUL, MVT::i32, Legal); 161 setOperationAction(ISD::MULHS, MVT::i32, Legal); 162 setOperationAction(ISD::MULHU, MVT::i32, Legal); 163 164 // MIPS32r6 replaces the accumulator-based division/remainder with separate 165 // three register division and remainder instructions. 166 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 167 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 168 setOperationAction(ISD::SDIV, MVT::i32, Legal); 169 setOperationAction(ISD::UDIV, MVT::i32, Legal); 170 setOperationAction(ISD::SREM, MVT::i32, Legal); 171 setOperationAction(ISD::UREM, MVT::i32, Legal); 172 173 // MIPS32r6 replaces conditional moves with an equivalent that removes the 174 // need for three GPR read ports. 175 setOperationAction(ISD::SETCC, MVT::i32, Legal); 176 setOperationAction(ISD::SELECT, MVT::i32, Legal); 177 setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); 178 179 setOperationAction(ISD::SETCC, MVT::f32, Legal); 180 setOperationAction(ISD::SELECT, MVT::f32, Legal); 181 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 182 183 assert(Subtarget->isFP64bit() && "FR=1 is required for MIPS32r6"); 184 setOperationAction(ISD::SETCC, MVT::f64, Legal); 185 setOperationAction(ISD::SELECT, MVT::f64, Legal); 186 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 187 188 setOperationAction(ISD::BRCOND, MVT::Other, Legal); 189 190 // Floating point > and >= are supported via < and <= 191 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 192 setCondCodeAction(ISD::SETOGT, MVT::f32, Expand); 193 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand); 194 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 195 196 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 197 setCondCodeAction(ISD::SETOGT, MVT::f64, Expand); 198 setCondCodeAction(ISD::SETUGE, MVT::f64, Expand); 199 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 200 } 201 202 if (Subtarget->hasMips64r6()) { 203 // MIPS64r6 replaces the accumulator-based multiplies with a three register 204 // instruction 205 setOperationAction(ISD::MUL, MVT::i64, Legal); 206 setOperationAction(ISD::MULHS, MVT::i64, Legal); 207 setOperationAction(ISD::MULHU, MVT::i64, Legal); 208 209 // MIPS32r6 replaces the accumulator-based division/remainder with separate 210 // three register division and remainder instructions. 211 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 212 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 213 setOperationAction(ISD::SDIV, MVT::i64, Legal); 214 setOperationAction(ISD::UDIV, MVT::i64, Legal); 215 setOperationAction(ISD::SREM, MVT::i64, Legal); 216 setOperationAction(ISD::UREM, MVT::i64, Legal); 217 218 // MIPS64r6 replaces conditional moves with an equivalent that removes the 219 // need for three GPR read ports. 220 setOperationAction(ISD::SETCC, MVT::i64, Legal); 221 setOperationAction(ISD::SELECT, MVT::i64, Legal); 222 setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); 223 } 224 225 computeRegisterProperties(); 226 } 227 228 const MipsTargetLowering * 229 llvm::createMipsSETargetLowering(MipsTargetMachine &TM) { 230 return new MipsSETargetLowering(TM); 231 } 232 233 const TargetRegisterClass * 234 MipsSETargetLowering::getRepRegClassFor(MVT VT) const { 235 if (VT == MVT::Untyped) 236 return Subtarget->hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass; 237 238 return TargetLowering::getRepRegClassFor(VT); 239 } 240 241 // Enable MSA support for the given integer type and Register class. 242 void MipsSETargetLowering:: 243 addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { 244 addRegisterClass(Ty, RC); 245 246 // Expand all builtin opcodes. 247 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 248 setOperationAction(Opc, Ty, Expand); 249 250 setOperationAction(ISD::BITCAST, Ty, Legal); 251 setOperationAction(ISD::LOAD, Ty, Legal); 252 setOperationAction(ISD::STORE, Ty, Legal); 253 setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom); 254 setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); 255 setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); 256 257 setOperationAction(ISD::ADD, Ty, Legal); 258 setOperationAction(ISD::AND, Ty, Legal); 259 setOperationAction(ISD::CTLZ, Ty, Legal); 260 setOperationAction(ISD::CTPOP, Ty, Legal); 261 setOperationAction(ISD::MUL, Ty, Legal); 262 setOperationAction(ISD::OR, Ty, Legal); 263 setOperationAction(ISD::SDIV, Ty, Legal); 264 setOperationAction(ISD::SREM, Ty, Legal); 265 setOperationAction(ISD::SHL, Ty, Legal); 266 setOperationAction(ISD::SRA, Ty, Legal); 267 setOperationAction(ISD::SRL, Ty, Legal); 268 setOperationAction(ISD::SUB, Ty, Legal); 269 setOperationAction(ISD::UDIV, Ty, Legal); 270 setOperationAction(ISD::UREM, Ty, Legal); 271 setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom); 272 setOperationAction(ISD::VSELECT, Ty, Legal); 273 setOperationAction(ISD::XOR, Ty, Legal); 274 275 if (Ty == MVT::v4i32 || Ty == MVT::v2i64) { 276 setOperationAction(ISD::FP_TO_SINT, Ty, Legal); 277 setOperationAction(ISD::FP_TO_UINT, Ty, Legal); 278 setOperationAction(ISD::SINT_TO_FP, Ty, Legal); 279 setOperationAction(ISD::UINT_TO_FP, Ty, Legal); 280 } 281 282 setOperationAction(ISD::SETCC, Ty, Legal); 283 setCondCodeAction(ISD::SETNE, Ty, Expand); 284 setCondCodeAction(ISD::SETGE, Ty, Expand); 285 setCondCodeAction(ISD::SETGT, Ty, Expand); 286 setCondCodeAction(ISD::SETUGE, Ty, Expand); 287 setCondCodeAction(ISD::SETUGT, Ty, Expand); 288 } 289 290 // Enable MSA support for the given floating-point type and Register class. 291 void MipsSETargetLowering:: 292 addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { 293 addRegisterClass(Ty, RC); 294 295 // Expand all builtin opcodes. 296 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 297 setOperationAction(Opc, Ty, Expand); 298 299 setOperationAction(ISD::LOAD, Ty, Legal); 300 setOperationAction(ISD::STORE, Ty, Legal); 301 setOperationAction(ISD::BITCAST, Ty, Legal); 302 setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal); 303 setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); 304 setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); 305 306 if (Ty != MVT::v8f16) { 307 setOperationAction(ISD::FABS, Ty, Legal); 308 setOperationAction(ISD::FADD, Ty, Legal); 309 setOperationAction(ISD::FDIV, Ty, Legal); 310 setOperationAction(ISD::FEXP2, Ty, Legal); 311 setOperationAction(ISD::FLOG2, Ty, Legal); 312 setOperationAction(ISD::FMA, Ty, Legal); 313 setOperationAction(ISD::FMUL, Ty, Legal); 314 setOperationAction(ISD::FRINT, Ty, Legal); 315 setOperationAction(ISD::FSQRT, Ty, Legal); 316 setOperationAction(ISD::FSUB, Ty, Legal); 317 setOperationAction(ISD::VSELECT, Ty, Legal); 318 319 setOperationAction(ISD::SETCC, Ty, Legal); 320 setCondCodeAction(ISD::SETOGE, Ty, Expand); 321 setCondCodeAction(ISD::SETOGT, Ty, Expand); 322 setCondCodeAction(ISD::SETUGE, Ty, Expand); 323 setCondCodeAction(ISD::SETUGT, Ty, Expand); 324 setCondCodeAction(ISD::SETGE, Ty, Expand); 325 setCondCodeAction(ISD::SETGT, Ty, Expand); 326 } 327 } 328 329 bool 330 MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, 331 unsigned, 332 bool *Fast) const { 333 MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; 334 335 if (Subtarget->systemSupportsUnalignedAccess()) { 336 // MIPS32r6/MIPS64r6 is required to support unaligned access. It's 337 // implementation defined whether this is handled by hardware, software, or 338 // a hybrid of the two but it's expected that most implementations will 339 // handle the majority of cases in hardware. 340 if (Fast) 341 *Fast = true; 342 return true; 343 } 344 345 switch (SVT) { 346 case MVT::i64: 347 case MVT::i32: 348 if (Fast) 349 *Fast = true; 350 return true; 351 default: 352 return false; 353 } 354 } 355 356 SDValue MipsSETargetLowering::LowerOperation(SDValue Op, 357 SelectionDAG &DAG) const { 358 switch(Op.getOpcode()) { 359 case ISD::LOAD: return lowerLOAD(Op, DAG); 360 case ISD::STORE: return lowerSTORE(Op, DAG); 361 case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG); 362 case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG); 363 case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG); 364 case ISD::MULHU: return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG); 365 case ISD::MUL: return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG); 366 case ISD::SDIVREM: return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG); 367 case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true, 368 DAG); 369 case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); 370 case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); 371 case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG); 372 case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); 373 case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); 374 case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG); 375 } 376 377 return MipsTargetLowering::LowerOperation(Op, DAG); 378 } 379 380 // selectMADD - 381 // Transforms a subgraph in CurDAG if the following pattern is found: 382 // (addc multLo, Lo0), (adde multHi, Hi0), 383 // where, 384 // multHi/Lo: product of multiplication 385 // Lo0: initial value of Lo register 386 // Hi0: initial value of Hi register 387 // Return true if pattern matching was successful. 388 static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) { 389 // ADDENode's second operand must be a flag output of an ADDC node in order 390 // for the matching to be successful. 391 SDNode *ADDCNode = ADDENode->getOperand(2).getNode(); 392 393 if (ADDCNode->getOpcode() != ISD::ADDC) 394 return false; 395 396 SDValue MultHi = ADDENode->getOperand(0); 397 SDValue MultLo = ADDCNode->getOperand(0); 398 SDNode *MultNode = MultHi.getNode(); 399 unsigned MultOpc = MultHi.getOpcode(); 400 401 // MultHi and MultLo must be generated by the same node, 402 if (MultLo.getNode() != MultNode) 403 return false; 404 405 // and it must be a multiplication. 406 if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) 407 return false; 408 409 // MultLo amd MultHi must be the first and second output of MultNode 410 // respectively. 411 if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) 412 return false; 413 414 // Transform this to a MADD only if ADDENode and ADDCNode are the only users 415 // of the values of MultNode, in which case MultNode will be removed in later 416 // phases. 417 // If there exist users other than ADDENode or ADDCNode, this function returns 418 // here, which will result in MultNode being mapped to a single MULT 419 // instruction node rather than a pair of MULT and MADD instructions being 420 // produced. 421 if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) 422 return false; 423 424 SDLoc DL(ADDENode); 425 426 // Initialize accumulator. 427 SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped, 428 ADDCNode->getOperand(1), 429 ADDENode->getOperand(1)); 430 431 // create MipsMAdd(u) node 432 MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd; 433 434 SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Untyped, 435 MultNode->getOperand(0),// Factor 0 436 MultNode->getOperand(1),// Factor 1 437 ACCIn); 438 439 // replace uses of adde and addc here 440 if (!SDValue(ADDCNode, 0).use_empty()) { 441 SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MAdd); 442 CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), LoOut); 443 } 444 if (!SDValue(ADDENode, 0).use_empty()) { 445 SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MAdd); 446 CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), HiOut); 447 } 448 449 return true; 450 } 451 452 // selectMSUB - 453 // Transforms a subgraph in CurDAG if the following pattern is found: 454 // (addc Lo0, multLo), (sube Hi0, multHi), 455 // where, 456 // multHi/Lo: product of multiplication 457 // Lo0: initial value of Lo register 458 // Hi0: initial value of Hi register 459 // Return true if pattern matching was successful. 460 static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) { 461 // SUBENode's second operand must be a flag output of an SUBC node in order 462 // for the matching to be successful. 463 SDNode *SUBCNode = SUBENode->getOperand(2).getNode(); 464 465 if (SUBCNode->getOpcode() != ISD::SUBC) 466 return false; 467 468 SDValue MultHi = SUBENode->getOperand(1); 469 SDValue MultLo = SUBCNode->getOperand(1); 470 SDNode *MultNode = MultHi.getNode(); 471 unsigned MultOpc = MultHi.getOpcode(); 472 473 // MultHi and MultLo must be generated by the same node, 474 if (MultLo.getNode() != MultNode) 475 return false; 476 477 // and it must be a multiplication. 478 if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) 479 return false; 480 481 // MultLo amd MultHi must be the first and second output of MultNode 482 // respectively. 483 if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) 484 return false; 485 486 // Transform this to a MSUB only if SUBENode and SUBCNode are the only users 487 // of the values of MultNode, in which case MultNode will be removed in later 488 // phases. 489 // If there exist users other than SUBENode or SUBCNode, this function returns 490 // here, which will result in MultNode being mapped to a single MULT 491 // instruction node rather than a pair of MULT and MSUB instructions being 492 // produced. 493 if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) 494 return false; 495 496 SDLoc DL(SUBENode); 497 498 // Initialize accumulator. 499 SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped, 500 SUBCNode->getOperand(0), 501 SUBENode->getOperand(0)); 502 503 // create MipsSub(u) node 504 MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub; 505 506 SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue, 507 MultNode->getOperand(0),// Factor 0 508 MultNode->getOperand(1),// Factor 1 509 ACCIn); 510 511 // replace uses of sube and subc here 512 if (!SDValue(SUBCNode, 0).use_empty()) { 513 SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MSub); 514 CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), LoOut); 515 } 516 if (!SDValue(SUBENode, 0).use_empty()) { 517 SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MSub); 518 CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), HiOut); 519 } 520 521 return true; 522 } 523 524 static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG, 525 TargetLowering::DAGCombinerInfo &DCI, 526 const MipsSubtarget *Subtarget) { 527 if (DCI.isBeforeLegalize()) 528 return SDValue(); 529 530 if (Subtarget->hasMips32() && !Subtarget->hasMips32r6() && 531 N->getValueType(0) == MVT::i32 && selectMADD(N, &DAG)) 532 return SDValue(N, 0); 533 534 return SDValue(); 535 } 536 537 // Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT 538 // 539 // Performs the following transformations: 540 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its 541 // sign/zero-extension is completely overwritten by the new one performed by 542 // the ISD::AND. 543 // - Removes redundant zero extensions performed by an ISD::AND. 544 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, 545 TargetLowering::DAGCombinerInfo &DCI, 546 const MipsSubtarget *Subtarget) { 547 if (!Subtarget->hasMSA()) 548 return SDValue(); 549 550 SDValue Op0 = N->getOperand(0); 551 SDValue Op1 = N->getOperand(1); 552 unsigned Op0Opcode = Op0->getOpcode(); 553 554 // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d) 555 // where $d + 1 == 2^n and n == 32 556 // or $d + 1 == 2^n and n <= 32 and ZExt 557 // -> (MipsVExtractZExt $a, $b, $c) 558 if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT || 559 Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) { 560 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Op1); 561 562 if (!Mask) 563 return SDValue(); 564 565 int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2(); 566 567 if (Log2IfPositive <= 0) 568 return SDValue(); // Mask+1 is not a power of 2 569 570 SDValue Op0Op2 = Op0->getOperand(2); 571 EVT ExtendTy = cast<VTSDNode>(Op0Op2)->getVT(); 572 unsigned ExtendTySize = ExtendTy.getSizeInBits(); 573 unsigned Log2 = Log2IfPositive; 574 575 if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) || 576 Log2 == ExtendTySize) { 577 SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 }; 578 DAG.MorphNodeTo(Op0.getNode(), MipsISD::VEXTRACT_ZEXT_ELT, 579 Op0->getVTList(), 580 makeArrayRef(Ops, Op0->getNumOperands())); 581 return Op0; 582 } 583 } 584 585 return SDValue(); 586 } 587 588 // Determine if the specified node is a constant vector splat. 589 // 590 // Returns true and sets Imm if: 591 // * N is a ISD::BUILD_VECTOR representing a constant splat 592 // 593 // This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The 594 // differences are that it assumes the MSA has already been checked and the 595 // arbitrary requirement for a maximum of 32-bit integers isn't applied (and 596 // must not be in order for binsri.d to be selectable). 597 static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) { 598 BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N.getNode()); 599 600 if (!Node) 601 return false; 602 603 APInt SplatValue, SplatUndef; 604 unsigned SplatBitSize; 605 bool HasAnyUndefs; 606 607 if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 608 8, !IsLittleEndian)) 609 return false; 610 611 Imm = SplatValue; 612 613 return true; 614 } 615 616 // Test whether the given node is an all-ones build_vector. 617 static bool isVectorAllOnes(SDValue N) { 618 // Look through bitcasts. Endianness doesn't matter because we are looking 619 // for an all-ones value. 620 if (N->getOpcode() == ISD::BITCAST) 621 N = N->getOperand(0); 622 623 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N); 624 625 if (!BVN) 626 return false; 627 628 APInt SplatValue, SplatUndef; 629 unsigned SplatBitSize; 630 bool HasAnyUndefs; 631 632 // Endianness doesn't matter in this context because we are looking for 633 // an all-ones value. 634 if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs)) 635 return SplatValue.isAllOnesValue(); 636 637 return false; 638 } 639 640 // Test whether N is the bitwise inverse of OfNode. 641 static bool isBitwiseInverse(SDValue N, SDValue OfNode) { 642 if (N->getOpcode() != ISD::XOR) 643 return false; 644 645 if (isVectorAllOnes(N->getOperand(0))) 646 return N->getOperand(1) == OfNode; 647 648 if (isVectorAllOnes(N->getOperand(1))) 649 return N->getOperand(0) == OfNode; 650 651 return false; 652 } 653 654 // Perform combines where ISD::OR is the root node. 655 // 656 // Performs the following transformations: 657 // - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b) 658 // where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit 659 // vector type. 660 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, 661 TargetLowering::DAGCombinerInfo &DCI, 662 const MipsSubtarget *Subtarget) { 663 if (!Subtarget->hasMSA()) 664 return SDValue(); 665 666 EVT Ty = N->getValueType(0); 667 668 if (!Ty.is128BitVector()) 669 return SDValue(); 670 671 SDValue Op0 = N->getOperand(0); 672 SDValue Op1 = N->getOperand(1); 673 674 if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) { 675 SDValue Op0Op0 = Op0->getOperand(0); 676 SDValue Op0Op1 = Op0->getOperand(1); 677 SDValue Op1Op0 = Op1->getOperand(0); 678 SDValue Op1Op1 = Op1->getOperand(1); 679 bool IsLittleEndian = !Subtarget->isLittle(); 680 681 SDValue IfSet, IfClr, Cond; 682 bool IsConstantMask = false; 683 APInt Mask, InvMask; 684 685 // If Op0Op0 is an appropriate mask, try to find it's inverse in either 686 // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while 687 // looking. 688 // IfClr will be set if we find a valid match. 689 if (isVSplat(Op0Op0, Mask, IsLittleEndian)) { 690 Cond = Op0Op0; 691 IfSet = Op0Op1; 692 693 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && 694 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 695 IfClr = Op1Op1; 696 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && 697 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 698 IfClr = Op1Op0; 699 700 IsConstantMask = true; 701 } 702 703 // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same 704 // thing again using this mask. 705 // IfClr will be set if we find a valid match. 706 if (!IfClr.getNode() && isVSplat(Op0Op1, Mask, IsLittleEndian)) { 707 Cond = Op0Op1; 708 IfSet = Op0Op0; 709 710 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && 711 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 712 IfClr = Op1Op1; 713 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && 714 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 715 IfClr = Op1Op0; 716 717 IsConstantMask = true; 718 } 719 720 // If IfClr is not yet set, try looking for a non-constant match. 721 // IfClr will be set if we find a valid match amongst the eight 722 // possibilities. 723 if (!IfClr.getNode()) { 724 if (isBitwiseInverse(Op0Op0, Op1Op0)) { 725 Cond = Op1Op0; 726 IfSet = Op1Op1; 727 IfClr = Op0Op1; 728 } else if (isBitwiseInverse(Op0Op1, Op1Op0)) { 729 Cond = Op1Op0; 730 IfSet = Op1Op1; 731 IfClr = Op0Op0; 732 } else if (isBitwiseInverse(Op0Op0, Op1Op1)) { 733 Cond = Op1Op1; 734 IfSet = Op1Op0; 735 IfClr = Op0Op1; 736 } else if (isBitwiseInverse(Op0Op1, Op1Op1)) { 737 Cond = Op1Op1; 738 IfSet = Op1Op0; 739 IfClr = Op0Op0; 740 } else if (isBitwiseInverse(Op1Op0, Op0Op0)) { 741 Cond = Op0Op0; 742 IfSet = Op0Op1; 743 IfClr = Op1Op1; 744 } else if (isBitwiseInverse(Op1Op1, Op0Op0)) { 745 Cond = Op0Op0; 746 IfSet = Op0Op1; 747 IfClr = Op1Op0; 748 } else if (isBitwiseInverse(Op1Op0, Op0Op1)) { 749 Cond = Op0Op1; 750 IfSet = Op0Op0; 751 IfClr = Op1Op1; 752 } else if (isBitwiseInverse(Op1Op1, Op0Op1)) { 753 Cond = Op0Op1; 754 IfSet = Op0Op0; 755 IfClr = Op1Op0; 756 } 757 } 758 759 // At this point, IfClr will be set if we have a valid match. 760 if (!IfClr.getNode()) 761 return SDValue(); 762 763 assert(Cond.getNode() && IfSet.getNode()); 764 765 // Fold degenerate cases. 766 if (IsConstantMask) { 767 if (Mask.isAllOnesValue()) 768 return IfSet; 769 else if (Mask == 0) 770 return IfClr; 771 } 772 773 // Transform the DAG into an equivalent VSELECT. 774 return DAG.getNode(ISD::VSELECT, SDLoc(N), Ty, Cond, IfSet, IfClr); 775 } 776 777 return SDValue(); 778 } 779 780 static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG, 781 TargetLowering::DAGCombinerInfo &DCI, 782 const MipsSubtarget *Subtarget) { 783 if (DCI.isBeforeLegalize()) 784 return SDValue(); 785 786 if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 && 787 selectMSUB(N, &DAG)) 788 return SDValue(N, 0); 789 790 return SDValue(); 791 } 792 793 static SDValue genConstMult(SDValue X, uint64_t C, SDLoc DL, EVT VT, 794 EVT ShiftTy, SelectionDAG &DAG) { 795 // Clear the upper (64 - VT.sizeInBits) bits. 796 C &= ((uint64_t)-1) >> (64 - VT.getSizeInBits()); 797 798 // Return 0. 799 if (C == 0) 800 return DAG.getConstant(0, VT); 801 802 // Return x. 803 if (C == 1) 804 return X; 805 806 // If c is power of 2, return (shl x, log2(c)). 807 if (isPowerOf2_64(C)) 808 return DAG.getNode(ISD::SHL, DL, VT, X, 809 DAG.getConstant(Log2_64(C), ShiftTy)); 810 811 unsigned Log2Ceil = Log2_64_Ceil(C); 812 uint64_t Floor = 1LL << Log2_64(C); 813 uint64_t Ceil = Log2Ceil == 64 ? 0LL : 1LL << Log2Ceil; 814 815 // If |c - floor_c| <= |c - ceil_c|, 816 // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))), 817 // return (add constMult(x, floor_c), constMult(x, c - floor_c)). 818 if (C - Floor <= Ceil - C) { 819 SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG); 820 SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG); 821 return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); 822 } 823 824 // If |c - floor_c| > |c - ceil_c|, 825 // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)). 826 SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG); 827 SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG); 828 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1); 829 } 830 831 static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, 832 const TargetLowering::DAGCombinerInfo &DCI, 833 const MipsSETargetLowering *TL) { 834 EVT VT = N->getValueType(0); 835 836 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) 837 if (!VT.isVector()) 838 return genConstMult(N->getOperand(0), C->getZExtValue(), SDLoc(N), 839 VT, TL->getScalarShiftAmountTy(VT), DAG); 840 841 return SDValue(N, 0); 842 } 843 844 static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, 845 SelectionDAG &DAG, 846 const MipsSubtarget *Subtarget) { 847 // See if this is a vector splat immediate node. 848 APInt SplatValue, SplatUndef; 849 unsigned SplatBitSize; 850 bool HasAnyUndefs; 851 unsigned EltSize = Ty.getVectorElementType().getSizeInBits(); 852 BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 853 854 if (!Subtarget->hasDSP()) 855 return SDValue(); 856 857 if (!BV || 858 !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 859 EltSize, !Subtarget->isLittle()) || 860 (SplatBitSize != EltSize) || 861 (SplatValue.getZExtValue() >= EltSize)) 862 return SDValue(); 863 864 return DAG.getNode(Opc, SDLoc(N), Ty, N->getOperand(0), 865 DAG.getConstant(SplatValue.getZExtValue(), MVT::i32)); 866 } 867 868 static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, 869 TargetLowering::DAGCombinerInfo &DCI, 870 const MipsSubtarget *Subtarget) { 871 EVT Ty = N->getValueType(0); 872 873 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) 874 return SDValue(); 875 876 return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget); 877 } 878 879 // Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold 880 // constant splats into MipsISD::SHRA_DSP for DSPr2. 881 // 882 // Performs the following transformations: 883 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its 884 // sign/zero-extension is completely overwritten by the new one performed by 885 // the ISD::SRA and ISD::SHL nodes. 886 // - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL 887 // sequence. 888 // 889 // See performDSPShiftCombine for more information about the transformation 890 // used for DSPr2. 891 static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, 892 TargetLowering::DAGCombinerInfo &DCI, 893 const MipsSubtarget *Subtarget) { 894 EVT Ty = N->getValueType(0); 895 896 if (Subtarget->hasMSA()) { 897 SDValue Op0 = N->getOperand(0); 898 SDValue Op1 = N->getOperand(1); 899 900 // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d) 901 // where $d + sizeof($c) == 32 902 // or $d + sizeof($c) <= 32 and SExt 903 // -> (MipsVExtractSExt $a, $b, $c) 904 if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) { 905 SDValue Op0Op0 = Op0->getOperand(0); 906 ConstantSDNode *ShAmount = dyn_cast<ConstantSDNode>(Op1); 907 908 if (!ShAmount) 909 return SDValue(); 910 911 if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT && 912 Op0Op0->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT) 913 return SDValue(); 914 915 EVT ExtendTy = cast<VTSDNode>(Op0Op0->getOperand(2))->getVT(); 916 unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits(); 917 918 if (TotalBits == 32 || 919 (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT && 920 TotalBits <= 32)) { 921 SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1), 922 Op0Op0->getOperand(2) }; 923 DAG.MorphNodeTo(Op0Op0.getNode(), MipsISD::VEXTRACT_SEXT_ELT, 924 Op0Op0->getVTList(), 925 makeArrayRef(Ops, Op0Op0->getNumOperands())); 926 return Op0Op0; 927 } 928 } 929 } 930 931 if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget->hasDSPR2())) 932 return SDValue(); 933 934 return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget); 935 } 936 937 938 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, 939 TargetLowering::DAGCombinerInfo &DCI, 940 const MipsSubtarget *Subtarget) { 941 EVT Ty = N->getValueType(0); 942 943 if (((Ty != MVT::v2i16) || !Subtarget->hasDSPR2()) && (Ty != MVT::v4i8)) 944 return SDValue(); 945 946 return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget); 947 } 948 949 static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) { 950 bool IsV216 = (Ty == MVT::v2i16); 951 952 switch (CC) { 953 case ISD::SETEQ: 954 case ISD::SETNE: return true; 955 case ISD::SETLT: 956 case ISD::SETLE: 957 case ISD::SETGT: 958 case ISD::SETGE: return IsV216; 959 case ISD::SETULT: 960 case ISD::SETULE: 961 case ISD::SETUGT: 962 case ISD::SETUGE: return !IsV216; 963 default: return false; 964 } 965 } 966 967 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { 968 EVT Ty = N->getValueType(0); 969 970 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) 971 return SDValue(); 972 973 if (!isLegalDSPCondCode(Ty, cast<CondCodeSDNode>(N->getOperand(2))->get())) 974 return SDValue(); 975 976 return DAG.getNode(MipsISD::SETCC_DSP, SDLoc(N), Ty, N->getOperand(0), 977 N->getOperand(1), N->getOperand(2)); 978 } 979 980 static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { 981 EVT Ty = N->getValueType(0); 982 983 if (Ty.is128BitVector() && Ty.isInteger()) { 984 // Try the following combines: 985 // (vselect (setcc $a, $b, SETLT), $b, $a)) -> (vsmax $a, $b) 986 // (vselect (setcc $a, $b, SETLE), $b, $a)) -> (vsmax $a, $b) 987 // (vselect (setcc $a, $b, SETLT), $a, $b)) -> (vsmin $a, $b) 988 // (vselect (setcc $a, $b, SETLE), $a, $b)) -> (vsmin $a, $b) 989 // (vselect (setcc $a, $b, SETULT), $b, $a)) -> (vumax $a, $b) 990 // (vselect (setcc $a, $b, SETULE), $b, $a)) -> (vumax $a, $b) 991 // (vselect (setcc $a, $b, SETULT), $a, $b)) -> (vumin $a, $b) 992 // (vselect (setcc $a, $b, SETULE), $a, $b)) -> (vumin $a, $b) 993 // SETGT/SETGE/SETUGT/SETUGE variants of these will show up initially but 994 // will be expanded to equivalent SETLT/SETLE/SETULT/SETULE versions by the 995 // legalizer. 996 SDValue Op0 = N->getOperand(0); 997 998 if (Op0->getOpcode() != ISD::SETCC) 999 return SDValue(); 1000 1001 ISD::CondCode CondCode = cast<CondCodeSDNode>(Op0->getOperand(2))->get(); 1002 bool Signed; 1003 1004 if (CondCode == ISD::SETLT || CondCode == ISD::SETLE) 1005 Signed = true; 1006 else if (CondCode == ISD::SETULT || CondCode == ISD::SETULE) 1007 Signed = false; 1008 else 1009 return SDValue(); 1010 1011 SDValue Op1 = N->getOperand(1); 1012 SDValue Op2 = N->getOperand(2); 1013 SDValue Op0Op0 = Op0->getOperand(0); 1014 SDValue Op0Op1 = Op0->getOperand(1); 1015 1016 if (Op1 == Op0Op0 && Op2 == Op0Op1) 1017 return DAG.getNode(Signed ? MipsISD::VSMIN : MipsISD::VUMIN, SDLoc(N), 1018 Ty, Op1, Op2); 1019 else if (Op1 == Op0Op1 && Op2 == Op0Op0) 1020 return DAG.getNode(Signed ? MipsISD::VSMAX : MipsISD::VUMAX, SDLoc(N), 1021 Ty, Op1, Op2); 1022 } else if ((Ty == MVT::v2i16) || (Ty == MVT::v4i8)) { 1023 SDValue SetCC = N->getOperand(0); 1024 1025 if (SetCC.getOpcode() != MipsISD::SETCC_DSP) 1026 return SDValue(); 1027 1028 return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty, 1029 SetCC.getOperand(0), SetCC.getOperand(1), 1030 N->getOperand(1), N->getOperand(2), SetCC.getOperand(2)); 1031 } 1032 1033 return SDValue(); 1034 } 1035 1036 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, 1037 const MipsSubtarget *Subtarget) { 1038 EVT Ty = N->getValueType(0); 1039 1040 if (Subtarget->hasMSA() && Ty.is128BitVector() && Ty.isInteger()) { 1041 // Try the following combines: 1042 // (xor (or $a, $b), (build_vector allones)) 1043 // (xor (or $a, $b), (bitcast (build_vector allones))) 1044 SDValue Op0 = N->getOperand(0); 1045 SDValue Op1 = N->getOperand(1); 1046 SDValue NotOp; 1047 1048 if (ISD::isBuildVectorAllOnes(Op0.getNode())) 1049 NotOp = Op1; 1050 else if (ISD::isBuildVectorAllOnes(Op1.getNode())) 1051 NotOp = Op0; 1052 else 1053 return SDValue(); 1054 1055 if (NotOp->getOpcode() == ISD::OR) 1056 return DAG.getNode(MipsISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0), 1057 NotOp->getOperand(1)); 1058 } 1059 1060 return SDValue(); 1061 } 1062 1063 SDValue 1064 MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { 1065 SelectionDAG &DAG = DCI.DAG; 1066 SDValue Val; 1067 1068 switch (N->getOpcode()) { 1069 case ISD::ADDE: 1070 return performADDECombine(N, DAG, DCI, Subtarget); 1071 case ISD::AND: 1072 Val = performANDCombine(N, DAG, DCI, Subtarget); 1073 break; 1074 case ISD::OR: 1075 Val = performORCombine(N, DAG, DCI, Subtarget); 1076 break; 1077 case ISD::SUBE: 1078 return performSUBECombine(N, DAG, DCI, Subtarget); 1079 case ISD::MUL: 1080 return performMULCombine(N, DAG, DCI, this); 1081 case ISD::SHL: 1082 return performSHLCombine(N, DAG, DCI, Subtarget); 1083 case ISD::SRA: 1084 return performSRACombine(N, DAG, DCI, Subtarget); 1085 case ISD::SRL: 1086 return performSRLCombine(N, DAG, DCI, Subtarget); 1087 case ISD::VSELECT: 1088 return performVSELECTCombine(N, DAG); 1089 case ISD::XOR: 1090 Val = performXORCombine(N, DAG, Subtarget); 1091 break; 1092 case ISD::SETCC: 1093 Val = performSETCCCombine(N, DAG); 1094 break; 1095 } 1096 1097 if (Val.getNode()) { 1098 DEBUG(dbgs() << "\nMipsSE DAG Combine:\n"; 1099 N->printrWithDepth(dbgs(), &DAG); 1100 dbgs() << "\n=> \n"; 1101 Val.getNode()->printrWithDepth(dbgs(), &DAG); 1102 dbgs() << "\n"); 1103 return Val; 1104 } 1105 1106 return MipsTargetLowering::PerformDAGCombine(N, DCI); 1107 } 1108 1109 MachineBasicBlock * 1110 MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 1111 MachineBasicBlock *BB) const { 1112 switch (MI->getOpcode()) { 1113 default: 1114 return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB); 1115 case Mips::BPOSGE32_PSEUDO: 1116 return emitBPOSGE32(MI, BB); 1117 case Mips::SNZ_B_PSEUDO: 1118 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_B); 1119 case Mips::SNZ_H_PSEUDO: 1120 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_H); 1121 case Mips::SNZ_W_PSEUDO: 1122 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_W); 1123 case Mips::SNZ_D_PSEUDO: 1124 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_D); 1125 case Mips::SNZ_V_PSEUDO: 1126 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_V); 1127 case Mips::SZ_B_PSEUDO: 1128 return emitMSACBranchPseudo(MI, BB, Mips::BZ_B); 1129 case Mips::SZ_H_PSEUDO: 1130 return emitMSACBranchPseudo(MI, BB, Mips::BZ_H); 1131 case Mips::SZ_W_PSEUDO: 1132 return emitMSACBranchPseudo(MI, BB, Mips::BZ_W); 1133 case Mips::SZ_D_PSEUDO: 1134 return emitMSACBranchPseudo(MI, BB, Mips::BZ_D); 1135 case Mips::SZ_V_PSEUDO: 1136 return emitMSACBranchPseudo(MI, BB, Mips::BZ_V); 1137 case Mips::COPY_FW_PSEUDO: 1138 return emitCOPY_FW(MI, BB); 1139 case Mips::COPY_FD_PSEUDO: 1140 return emitCOPY_FD(MI, BB); 1141 case Mips::INSERT_FW_PSEUDO: 1142 return emitINSERT_FW(MI, BB); 1143 case Mips::INSERT_FD_PSEUDO: 1144 return emitINSERT_FD(MI, BB); 1145 case Mips::INSERT_B_VIDX_PSEUDO: 1146 return emitINSERT_DF_VIDX(MI, BB, 1, false); 1147 case Mips::INSERT_H_VIDX_PSEUDO: 1148 return emitINSERT_DF_VIDX(MI, BB, 2, false); 1149 case Mips::INSERT_W_VIDX_PSEUDO: 1150 return emitINSERT_DF_VIDX(MI, BB, 4, false); 1151 case Mips::INSERT_D_VIDX_PSEUDO: 1152 return emitINSERT_DF_VIDX(MI, BB, 8, false); 1153 case Mips::INSERT_FW_VIDX_PSEUDO: 1154 return emitINSERT_DF_VIDX(MI, BB, 4, true); 1155 case Mips::INSERT_FD_VIDX_PSEUDO: 1156 return emitINSERT_DF_VIDX(MI, BB, 8, true); 1157 case Mips::FILL_FW_PSEUDO: 1158 return emitFILL_FW(MI, BB); 1159 case Mips::FILL_FD_PSEUDO: 1160 return emitFILL_FD(MI, BB); 1161 case Mips::FEXP2_W_1_PSEUDO: 1162 return emitFEXP2_W_1(MI, BB); 1163 case Mips::FEXP2_D_1_PSEUDO: 1164 return emitFEXP2_D_1(MI, BB); 1165 } 1166 } 1167 1168 bool MipsSETargetLowering:: 1169 isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, 1170 unsigned NextStackOffset, 1171 const MipsFunctionInfo& FI) const { 1172 if (!EnableMipsTailCalls) 1173 return false; 1174 1175 // Return false if either the callee or caller has a byval argument. 1176 if (MipsCCInfo.hasByValArg() || FI.hasByvalArg()) 1177 return false; 1178 1179 // Return true if the callee's argument area is no larger than the 1180 // caller's. 1181 return NextStackOffset <= FI.getIncomingArgSize(); 1182 } 1183 1184 void MipsSETargetLowering:: 1185 getOpndList(SmallVectorImpl<SDValue> &Ops, 1186 std::deque< std::pair<unsigned, SDValue> > &RegsToPass, 1187 bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, 1188 CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const { 1189 Ops.push_back(Callee); 1190 MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, 1191 InternalLinkage, CLI, Callee, Chain); 1192 } 1193 1194 SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { 1195 LoadSDNode &Nd = *cast<LoadSDNode>(Op); 1196 1197 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) 1198 return MipsTargetLowering::lowerLOAD(Op, DAG); 1199 1200 // Replace a double precision load with two i32 loads and a buildpair64. 1201 SDLoc DL(Op); 1202 SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); 1203 EVT PtrVT = Ptr.getValueType(); 1204 1205 // i32 load from lower address. 1206 SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr, 1207 MachinePointerInfo(), Nd.isVolatile(), 1208 Nd.isNonTemporal(), Nd.isInvariant(), 1209 Nd.getAlignment()); 1210 1211 // i32 load from higher address. 1212 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT)); 1213 SDValue Hi = DAG.getLoad(MVT::i32, DL, Lo.getValue(1), Ptr, 1214 MachinePointerInfo(), Nd.isVolatile(), 1215 Nd.isNonTemporal(), Nd.isInvariant(), 1216 std::min(Nd.getAlignment(), 4U)); 1217 1218 if (!Subtarget->isLittle()) 1219 std::swap(Lo, Hi); 1220 1221 SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 1222 SDValue Ops[2] = {BP, Hi.getValue(1)}; 1223 return DAG.getMergeValues(Ops, DL); 1224 } 1225 1226 SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { 1227 StoreSDNode &Nd = *cast<StoreSDNode>(Op); 1228 1229 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) 1230 return MipsTargetLowering::lowerSTORE(Op, DAG); 1231 1232 // Replace a double precision store with two extractelement64s and i32 stores. 1233 SDLoc DL(Op); 1234 SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); 1235 EVT PtrVT = Ptr.getValueType(); 1236 SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, 1237 Val, DAG.getConstant(0, MVT::i32)); 1238 SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, 1239 Val, DAG.getConstant(1, MVT::i32)); 1240 1241 if (!Subtarget->isLittle()) 1242 std::swap(Lo, Hi); 1243 1244 // i32 store to lower address. 1245 Chain = DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), 1246 Nd.isVolatile(), Nd.isNonTemporal(), Nd.getAlignment(), 1247 Nd.getTBAAInfo()); 1248 1249 // i32 store to higher address. 1250 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT)); 1251 return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(), 1252 Nd.isVolatile(), Nd.isNonTemporal(), 1253 std::min(Nd.getAlignment(), 4U), Nd.getTBAAInfo()); 1254 } 1255 1256 SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, 1257 bool HasLo, bool HasHi, 1258 SelectionDAG &DAG) const { 1259 // MIPS32r6/MIPS64r6 removed accumulator based multiplies. 1260 assert(!Subtarget->hasMips32r6()); 1261 1262 EVT Ty = Op.getOperand(0).getValueType(); 1263 SDLoc DL(Op); 1264 SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped, 1265 Op.getOperand(0), Op.getOperand(1)); 1266 SDValue Lo, Hi; 1267 1268 if (HasLo) 1269 Lo = DAG.getNode(MipsISD::MFLO, DL, Ty, Mult); 1270 if (HasHi) 1271 Hi = DAG.getNode(MipsISD::MFHI, DL, Ty, Mult); 1272 1273 if (!HasLo || !HasHi) 1274 return HasLo ? Lo : Hi; 1275 1276 SDValue Vals[] = { Lo, Hi }; 1277 return DAG.getMergeValues(Vals, DL); 1278 } 1279 1280 1281 static SDValue initAccumulator(SDValue In, SDLoc DL, SelectionDAG &DAG) { 1282 SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, 1283 DAG.getConstant(0, MVT::i32)); 1284 SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, 1285 DAG.getConstant(1, MVT::i32)); 1286 return DAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, InLo, InHi); 1287 } 1288 1289 static SDValue extractLOHI(SDValue Op, SDLoc DL, SelectionDAG &DAG) { 1290 SDValue Lo = DAG.getNode(MipsISD::MFLO, DL, MVT::i32, Op); 1291 SDValue Hi = DAG.getNode(MipsISD::MFHI, DL, MVT::i32, Op); 1292 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); 1293 } 1294 1295 // This function expands mips intrinsic nodes which have 64-bit input operands 1296 // or output values. 1297 // 1298 // out64 = intrinsic-node in64 1299 // => 1300 // lo = copy (extract-element (in64, 0)) 1301 // hi = copy (extract-element (in64, 1)) 1302 // mips-specific-node 1303 // v0 = copy lo 1304 // v1 = copy hi 1305 // out64 = merge-values (v0, v1) 1306 // 1307 static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { 1308 SDLoc DL(Op); 1309 bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other; 1310 SmallVector<SDValue, 3> Ops; 1311 unsigned OpNo = 0; 1312 1313 // See if Op has a chain input. 1314 if (HasChainIn) 1315 Ops.push_back(Op->getOperand(OpNo++)); 1316 1317 // The next operand is the intrinsic opcode. 1318 assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant); 1319 1320 // See if the next operand has type i64. 1321 SDValue Opnd = Op->getOperand(++OpNo), In64; 1322 1323 if (Opnd.getValueType() == MVT::i64) 1324 In64 = initAccumulator(Opnd, DL, DAG); 1325 else 1326 Ops.push_back(Opnd); 1327 1328 // Push the remaining operands. 1329 for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo) 1330 Ops.push_back(Op->getOperand(OpNo)); 1331 1332 // Add In64 to the end of the list. 1333 if (In64.getNode()) 1334 Ops.push_back(In64); 1335 1336 // Scan output. 1337 SmallVector<EVT, 2> ResTys; 1338 1339 for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end(); 1340 I != E; ++I) 1341 ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I); 1342 1343 // Create node. 1344 SDValue Val = DAG.getNode(Opc, DL, ResTys, Ops); 1345 SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val; 1346 1347 if (!HasChainIn) 1348 return Out; 1349 1350 assert(Val->getValueType(1) == MVT::Other); 1351 SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) }; 1352 return DAG.getMergeValues(Vals, DL); 1353 } 1354 1355 // Lower an MSA copy intrinsic into the specified SelectionDAG node 1356 static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { 1357 SDLoc DL(Op); 1358 SDValue Vec = Op->getOperand(1); 1359 SDValue Idx = Op->getOperand(2); 1360 EVT ResTy = Op->getValueType(0); 1361 EVT EltTy = Vec->getValueType(0).getVectorElementType(); 1362 1363 SDValue Result = DAG.getNode(Opc, DL, ResTy, Vec, Idx, 1364 DAG.getValueType(EltTy)); 1365 1366 return Result; 1367 } 1368 1369 static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { 1370 EVT ResVecTy = Op->getValueType(0); 1371 EVT ViaVecTy = ResVecTy; 1372 SDLoc DL(Op); 1373 1374 // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and 1375 // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating 1376 // lanes. 1377 SDValue LaneA; 1378 SDValue LaneB = Op->getOperand(2); 1379 1380 if (ResVecTy == MVT::v2i64) { 1381 LaneA = DAG.getConstant(0, MVT::i32); 1382 ViaVecTy = MVT::v4i32; 1383 } else 1384 LaneA = LaneB; 1385 1386 SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, 1387 LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB }; 1388 1389 SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, 1390 makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); 1391 1392 if (ViaVecTy != ResVecTy) 1393 Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, Result); 1394 1395 return Result; 1396 } 1397 1398 static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG) { 1399 return DAG.getConstant(Op->getConstantOperandVal(ImmOp), Op->getValueType(0)); 1400 } 1401 1402 static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, 1403 bool BigEndian, SelectionDAG &DAG) { 1404 EVT ViaVecTy = VecTy; 1405 SDValue SplatValueA = SplatValue; 1406 SDValue SplatValueB = SplatValue; 1407 SDLoc DL(SplatValue); 1408 1409 if (VecTy == MVT::v2i64) { 1410 // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's. 1411 ViaVecTy = MVT::v4i32; 1412 1413 SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue); 1414 SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue, 1415 DAG.getConstant(32, MVT::i32)); 1416 SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB); 1417 } 1418 1419 // We currently hold the parts in little endian order. Swap them if 1420 // necessary. 1421 if (BigEndian) 1422 std::swap(SplatValueA, SplatValueB); 1423 1424 SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1425 SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1426 SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1427 SplatValueA, SplatValueB, SplatValueA, SplatValueB }; 1428 1429 SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, 1430 makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); 1431 1432 if (VecTy != ViaVecTy) 1433 Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result); 1434 1435 return Result; 1436 } 1437 1438 static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, 1439 unsigned Opc, SDValue Imm, 1440 bool BigEndian) { 1441 EVT VecTy = Op->getValueType(0); 1442 SDValue Exp2Imm; 1443 SDLoc DL(Op); 1444 1445 // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it 1446 // here for now. 1447 if (VecTy == MVT::v2i64) { 1448 if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Imm)) { 1449 APInt BitImm = APInt(64, 1) << CImm->getAPIntValue(); 1450 1451 SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), MVT::i32); 1452 SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), MVT::i32); 1453 1454 if (BigEndian) 1455 std::swap(BitImmLoOp, BitImmHiOp); 1456 1457 Exp2Imm = 1458 DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, 1459 DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, BitImmLoOp, 1460 BitImmHiOp, BitImmLoOp, BitImmHiOp)); 1461 } 1462 } 1463 1464 if (!Exp2Imm.getNode()) { 1465 // We couldnt constant fold, do a vector shift instead 1466 1467 // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since 1468 // only values 0-63 are valid. 1469 if (VecTy == MVT::v2i64) 1470 Imm = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Imm); 1471 1472 Exp2Imm = getBuildVectorSplat(VecTy, Imm, BigEndian, DAG); 1473 1474 Exp2Imm = 1475 DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, VecTy), Exp2Imm); 1476 } 1477 1478 return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm); 1479 } 1480 1481 static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) { 1482 EVT ResTy = Op->getValueType(0); 1483 SDLoc DL(Op); 1484 SDValue One = DAG.getConstant(1, ResTy); 1485 SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, Op->getOperand(2)); 1486 1487 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), 1488 DAG.getNOT(DL, Bit, ResTy)); 1489 } 1490 1491 static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) { 1492 SDLoc DL(Op); 1493 EVT ResTy = Op->getValueType(0); 1494 APInt BitImm = APInt(ResTy.getVectorElementType().getSizeInBits(), 1) 1495 << cast<ConstantSDNode>(Op->getOperand(2))->getAPIntValue(); 1496 SDValue BitMask = DAG.getConstant(~BitImm, ResTy); 1497 1498 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask); 1499 } 1500 1501 SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, 1502 SelectionDAG &DAG) const { 1503 SDLoc DL(Op); 1504 1505 switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) { 1506 default: 1507 return SDValue(); 1508 case Intrinsic::mips_shilo: 1509 return lowerDSPIntr(Op, DAG, MipsISD::SHILO); 1510 case Intrinsic::mips_dpau_h_qbl: 1511 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL); 1512 case Intrinsic::mips_dpau_h_qbr: 1513 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR); 1514 case Intrinsic::mips_dpsu_h_qbl: 1515 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL); 1516 case Intrinsic::mips_dpsu_h_qbr: 1517 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR); 1518 case Intrinsic::mips_dpa_w_ph: 1519 return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH); 1520 case Intrinsic::mips_dps_w_ph: 1521 return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH); 1522 case Intrinsic::mips_dpax_w_ph: 1523 return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH); 1524 case Intrinsic::mips_dpsx_w_ph: 1525 return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH); 1526 case Intrinsic::mips_mulsa_w_ph: 1527 return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH); 1528 case Intrinsic::mips_mult: 1529 return lowerDSPIntr(Op, DAG, MipsISD::Mult); 1530 case Intrinsic::mips_multu: 1531 return lowerDSPIntr(Op, DAG, MipsISD::Multu); 1532 case Intrinsic::mips_madd: 1533 return lowerDSPIntr(Op, DAG, MipsISD::MAdd); 1534 case Intrinsic::mips_maddu: 1535 return lowerDSPIntr(Op, DAG, MipsISD::MAddu); 1536 case Intrinsic::mips_msub: 1537 return lowerDSPIntr(Op, DAG, MipsISD::MSub); 1538 case Intrinsic::mips_msubu: 1539 return lowerDSPIntr(Op, DAG, MipsISD::MSubu); 1540 case Intrinsic::mips_addv_b: 1541 case Intrinsic::mips_addv_h: 1542 case Intrinsic::mips_addv_w: 1543 case Intrinsic::mips_addv_d: 1544 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), 1545 Op->getOperand(2)); 1546 case Intrinsic::mips_addvi_b: 1547 case Intrinsic::mips_addvi_h: 1548 case Intrinsic::mips_addvi_w: 1549 case Intrinsic::mips_addvi_d: 1550 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), 1551 lowerMSASplatImm(Op, 2, DAG)); 1552 case Intrinsic::mips_and_v: 1553 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), 1554 Op->getOperand(2)); 1555 case Intrinsic::mips_andi_b: 1556 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), 1557 lowerMSASplatImm(Op, 2, DAG)); 1558 case Intrinsic::mips_bclr_b: 1559 case Intrinsic::mips_bclr_h: 1560 case Intrinsic::mips_bclr_w: 1561 case Intrinsic::mips_bclr_d: 1562 return lowerMSABitClear(Op, DAG); 1563 case Intrinsic::mips_bclri_b: 1564 case Intrinsic::mips_bclri_h: 1565 case Intrinsic::mips_bclri_w: 1566 case Intrinsic::mips_bclri_d: 1567 return lowerMSABitClearImm(Op, DAG); 1568 case Intrinsic::mips_binsli_b: 1569 case Intrinsic::mips_binsli_h: 1570 case Intrinsic::mips_binsli_w: 1571 case Intrinsic::mips_binsli_d: { 1572 // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear) 1573 EVT VecTy = Op->getValueType(0); 1574 EVT EltTy = VecTy.getVectorElementType(); 1575 APInt Mask = APInt::getHighBitsSet(EltTy.getSizeInBits(), 1576 Op->getConstantOperandVal(3)); 1577 return DAG.getNode(ISD::VSELECT, DL, VecTy, 1578 DAG.getConstant(Mask, VecTy, true), Op->getOperand(2), 1579 Op->getOperand(1)); 1580 } 1581 case Intrinsic::mips_binsri_b: 1582 case Intrinsic::mips_binsri_h: 1583 case Intrinsic::mips_binsri_w: 1584 case Intrinsic::mips_binsri_d: { 1585 // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear) 1586 EVT VecTy = Op->getValueType(0); 1587 EVT EltTy = VecTy.getVectorElementType(); 1588 APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(), 1589 Op->getConstantOperandVal(3)); 1590 return DAG.getNode(ISD::VSELECT, DL, VecTy, 1591 DAG.getConstant(Mask, VecTy, true), Op->getOperand(2), 1592 Op->getOperand(1)); 1593 } 1594 case Intrinsic::mips_bmnz_v: 1595 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), 1596 Op->getOperand(2), Op->getOperand(1)); 1597 case Intrinsic::mips_bmnzi_b: 1598 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1599 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(2), 1600 Op->getOperand(1)); 1601 case Intrinsic::mips_bmz_v: 1602 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), 1603 Op->getOperand(1), Op->getOperand(2)); 1604 case Intrinsic::mips_bmzi_b: 1605 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1606 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(1), 1607 Op->getOperand(2)); 1608 case Intrinsic::mips_bneg_b: 1609 case Intrinsic::mips_bneg_h: 1610 case Intrinsic::mips_bneg_w: 1611 case Intrinsic::mips_bneg_d: { 1612 EVT VecTy = Op->getValueType(0); 1613 SDValue One = DAG.getConstant(1, VecTy); 1614 1615 return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1), 1616 DAG.getNode(ISD::SHL, DL, VecTy, One, 1617 Op->getOperand(2))); 1618 } 1619 case Intrinsic::mips_bnegi_b: 1620 case Intrinsic::mips_bnegi_h: 1621 case Intrinsic::mips_bnegi_w: 1622 case Intrinsic::mips_bnegi_d: 1623 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::XOR, Op->getOperand(2), 1624 !Subtarget->isLittle()); 1625 case Intrinsic::mips_bnz_b: 1626 case Intrinsic::mips_bnz_h: 1627 case Intrinsic::mips_bnz_w: 1628 case Intrinsic::mips_bnz_d: 1629 return DAG.getNode(MipsISD::VALL_NONZERO, DL, Op->getValueType(0), 1630 Op->getOperand(1)); 1631 case Intrinsic::mips_bnz_v: 1632 return DAG.getNode(MipsISD::VANY_NONZERO, DL, Op->getValueType(0), 1633 Op->getOperand(1)); 1634 case Intrinsic::mips_bsel_v: 1635 // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) 1636 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1637 Op->getOperand(1), Op->getOperand(3), 1638 Op->getOperand(2)); 1639 case Intrinsic::mips_bseli_b: 1640 // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) 1641 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1642 Op->getOperand(1), lowerMSASplatImm(Op, 3, DAG), 1643 Op->getOperand(2)); 1644 case Intrinsic::mips_bset_b: 1645 case Intrinsic::mips_bset_h: 1646 case Intrinsic::mips_bset_w: 1647 case Intrinsic::mips_bset_d: { 1648 EVT VecTy = Op->getValueType(0); 1649 SDValue One = DAG.getConstant(1, VecTy); 1650 1651 return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1), 1652 DAG.getNode(ISD::SHL, DL, VecTy, One, 1653 Op->getOperand(2))); 1654 } 1655 case Intrinsic::mips_bseti_b: 1656 case Intrinsic::mips_bseti_h: 1657 case Intrinsic::mips_bseti_w: 1658 case Intrinsic::mips_bseti_d: 1659 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::OR, Op->getOperand(2), 1660 !Subtarget->isLittle()); 1661 case Intrinsic::mips_bz_b: 1662 case Intrinsic::mips_bz_h: 1663 case Intrinsic::mips_bz_w: 1664 case Intrinsic::mips_bz_d: 1665 return DAG.getNode(MipsISD::VALL_ZERO, DL, Op->getValueType(0), 1666 Op->getOperand(1)); 1667 case Intrinsic::mips_bz_v: 1668 return DAG.getNode(MipsISD::VANY_ZERO, DL, Op->getValueType(0), 1669 Op->getOperand(1)); 1670 case Intrinsic::mips_ceq_b: 1671 case Intrinsic::mips_ceq_h: 1672 case Intrinsic::mips_ceq_w: 1673 case Intrinsic::mips_ceq_d: 1674 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1675 Op->getOperand(2), ISD::SETEQ); 1676 case Intrinsic::mips_ceqi_b: 1677 case Intrinsic::mips_ceqi_h: 1678 case Intrinsic::mips_ceqi_w: 1679 case Intrinsic::mips_ceqi_d: 1680 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1681 lowerMSASplatImm(Op, 2, DAG), ISD::SETEQ); 1682 case Intrinsic::mips_cle_s_b: 1683 case Intrinsic::mips_cle_s_h: 1684 case Intrinsic::mips_cle_s_w: 1685 case Intrinsic::mips_cle_s_d: 1686 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1687 Op->getOperand(2), ISD::SETLE); 1688 case Intrinsic::mips_clei_s_b: 1689 case Intrinsic::mips_clei_s_h: 1690 case Intrinsic::mips_clei_s_w: 1691 case Intrinsic::mips_clei_s_d: 1692 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1693 lowerMSASplatImm(Op, 2, DAG), ISD::SETLE); 1694 case Intrinsic::mips_cle_u_b: 1695 case Intrinsic::mips_cle_u_h: 1696 case Intrinsic::mips_cle_u_w: 1697 case Intrinsic::mips_cle_u_d: 1698 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1699 Op->getOperand(2), ISD::SETULE); 1700 case Intrinsic::mips_clei_u_b: 1701 case Intrinsic::mips_clei_u_h: 1702 case Intrinsic::mips_clei_u_w: 1703 case Intrinsic::mips_clei_u_d: 1704 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1705 lowerMSASplatImm(Op, 2, DAG), ISD::SETULE); 1706 case Intrinsic::mips_clt_s_b: 1707 case Intrinsic::mips_clt_s_h: 1708 case Intrinsic::mips_clt_s_w: 1709 case Intrinsic::mips_clt_s_d: 1710 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1711 Op->getOperand(2), ISD::SETLT); 1712 case Intrinsic::mips_clti_s_b: 1713 case Intrinsic::mips_clti_s_h: 1714 case Intrinsic::mips_clti_s_w: 1715 case Intrinsic::mips_clti_s_d: 1716 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1717 lowerMSASplatImm(Op, 2, DAG), ISD::SETLT); 1718 case Intrinsic::mips_clt_u_b: 1719 case Intrinsic::mips_clt_u_h: 1720 case Intrinsic::mips_clt_u_w: 1721 case Intrinsic::mips_clt_u_d: 1722 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1723 Op->getOperand(2), ISD::SETULT); 1724 case Intrinsic::mips_clti_u_b: 1725 case Intrinsic::mips_clti_u_h: 1726 case Intrinsic::mips_clti_u_w: 1727 case Intrinsic::mips_clti_u_d: 1728 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1729 lowerMSASplatImm(Op, 2, DAG), ISD::SETULT); 1730 case Intrinsic::mips_copy_s_b: 1731 case Intrinsic::mips_copy_s_h: 1732 case Intrinsic::mips_copy_s_w: 1733 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); 1734 case Intrinsic::mips_copy_s_d: 1735 if (Subtarget->hasMips64()) 1736 // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64. 1737 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); 1738 else { 1739 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type 1740 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. 1741 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 1742 Op->getValueType(0), Op->getOperand(1), 1743 Op->getOperand(2)); 1744 } 1745 case Intrinsic::mips_copy_u_b: 1746 case Intrinsic::mips_copy_u_h: 1747 case Intrinsic::mips_copy_u_w: 1748 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); 1749 case Intrinsic::mips_copy_u_d: 1750 if (Subtarget->hasMips64()) 1751 // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64. 1752 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); 1753 else { 1754 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type 1755 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. 1756 // Note: When i64 is illegal, this results in copy_s.w instructions 1757 // instead of copy_u.w instructions. This makes no difference to the 1758 // behaviour since i64 is only illegal when the register file is 32-bit. 1759 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 1760 Op->getValueType(0), Op->getOperand(1), 1761 Op->getOperand(2)); 1762 } 1763 case Intrinsic::mips_div_s_b: 1764 case Intrinsic::mips_div_s_h: 1765 case Intrinsic::mips_div_s_w: 1766 case Intrinsic::mips_div_s_d: 1767 return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1), 1768 Op->getOperand(2)); 1769 case Intrinsic::mips_div_u_b: 1770 case Intrinsic::mips_div_u_h: 1771 case Intrinsic::mips_div_u_w: 1772 case Intrinsic::mips_div_u_d: 1773 return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1), 1774 Op->getOperand(2)); 1775 case Intrinsic::mips_fadd_w: 1776 case Intrinsic::mips_fadd_d: 1777 return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1), 1778 Op->getOperand(2)); 1779 // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away 1780 case Intrinsic::mips_fceq_w: 1781 case Intrinsic::mips_fceq_d: 1782 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1783 Op->getOperand(2), ISD::SETOEQ); 1784 case Intrinsic::mips_fcle_w: 1785 case Intrinsic::mips_fcle_d: 1786 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1787 Op->getOperand(2), ISD::SETOLE); 1788 case Intrinsic::mips_fclt_w: 1789 case Intrinsic::mips_fclt_d: 1790 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1791 Op->getOperand(2), ISD::SETOLT); 1792 case Intrinsic::mips_fcne_w: 1793 case Intrinsic::mips_fcne_d: 1794 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1795 Op->getOperand(2), ISD::SETONE); 1796 case Intrinsic::mips_fcor_w: 1797 case Intrinsic::mips_fcor_d: 1798 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1799 Op->getOperand(2), ISD::SETO); 1800 case Intrinsic::mips_fcueq_w: 1801 case Intrinsic::mips_fcueq_d: 1802 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1803 Op->getOperand(2), ISD::SETUEQ); 1804 case Intrinsic::mips_fcule_w: 1805 case Intrinsic::mips_fcule_d: 1806 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1807 Op->getOperand(2), ISD::SETULE); 1808 case Intrinsic::mips_fcult_w: 1809 case Intrinsic::mips_fcult_d: 1810 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1811 Op->getOperand(2), ISD::SETULT); 1812 case Intrinsic::mips_fcun_w: 1813 case Intrinsic::mips_fcun_d: 1814 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1815 Op->getOperand(2), ISD::SETUO); 1816 case Intrinsic::mips_fcune_w: 1817 case Intrinsic::mips_fcune_d: 1818 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1819 Op->getOperand(2), ISD::SETUNE); 1820 case Intrinsic::mips_fdiv_w: 1821 case Intrinsic::mips_fdiv_d: 1822 return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1), 1823 Op->getOperand(2)); 1824 case Intrinsic::mips_ffint_u_w: 1825 case Intrinsic::mips_ffint_u_d: 1826 return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0), 1827 Op->getOperand(1)); 1828 case Intrinsic::mips_ffint_s_w: 1829 case Intrinsic::mips_ffint_s_d: 1830 return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0), 1831 Op->getOperand(1)); 1832 case Intrinsic::mips_fill_b: 1833 case Intrinsic::mips_fill_h: 1834 case Intrinsic::mips_fill_w: 1835 case Intrinsic::mips_fill_d: { 1836 SmallVector<SDValue, 16> Ops; 1837 EVT ResTy = Op->getValueType(0); 1838 1839 for (unsigned i = 0; i < ResTy.getVectorNumElements(); ++i) 1840 Ops.push_back(Op->getOperand(1)); 1841 1842 // If ResTy is v2i64 then the type legalizer will break this node down into 1843 // an equivalent v4i32. 1844 return DAG.getNode(ISD::BUILD_VECTOR, DL, ResTy, Ops); 1845 } 1846 case Intrinsic::mips_fexp2_w: 1847 case Intrinsic::mips_fexp2_d: { 1848 EVT ResTy = Op->getValueType(0); 1849 return DAG.getNode( 1850 ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1), 1851 DAG.getNode(ISD::FEXP2, SDLoc(Op), ResTy, Op->getOperand(2))); 1852 } 1853 case Intrinsic::mips_flog2_w: 1854 case Intrinsic::mips_flog2_d: 1855 return DAG.getNode(ISD::FLOG2, DL, Op->getValueType(0), Op->getOperand(1)); 1856 case Intrinsic::mips_fmadd_w: 1857 case Intrinsic::mips_fmadd_d: 1858 return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0), 1859 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 1860 case Intrinsic::mips_fmul_w: 1861 case Intrinsic::mips_fmul_d: 1862 return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1), 1863 Op->getOperand(2)); 1864 case Intrinsic::mips_fmsub_w: 1865 case Intrinsic::mips_fmsub_d: { 1866 EVT ResTy = Op->getValueType(0); 1867 return DAG.getNode(ISD::FSUB, SDLoc(Op), ResTy, Op->getOperand(1), 1868 DAG.getNode(ISD::FMUL, SDLoc(Op), ResTy, 1869 Op->getOperand(2), Op->getOperand(3))); 1870 } 1871 case Intrinsic::mips_frint_w: 1872 case Intrinsic::mips_frint_d: 1873 return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1)); 1874 case Intrinsic::mips_fsqrt_w: 1875 case Intrinsic::mips_fsqrt_d: 1876 return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1)); 1877 case Intrinsic::mips_fsub_w: 1878 case Intrinsic::mips_fsub_d: 1879 return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1), 1880 Op->getOperand(2)); 1881 case Intrinsic::mips_ftrunc_u_w: 1882 case Intrinsic::mips_ftrunc_u_d: 1883 return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0), 1884 Op->getOperand(1)); 1885 case Intrinsic::mips_ftrunc_s_w: 1886 case Intrinsic::mips_ftrunc_s_d: 1887 return DAG.getNode(ISD::FP_TO_SINT, DL, Op->getValueType(0), 1888 Op->getOperand(1)); 1889 case Intrinsic::mips_ilvev_b: 1890 case Intrinsic::mips_ilvev_h: 1891 case Intrinsic::mips_ilvev_w: 1892 case Intrinsic::mips_ilvev_d: 1893 return DAG.getNode(MipsISD::ILVEV, DL, Op->getValueType(0), 1894 Op->getOperand(1), Op->getOperand(2)); 1895 case Intrinsic::mips_ilvl_b: 1896 case Intrinsic::mips_ilvl_h: 1897 case Intrinsic::mips_ilvl_w: 1898 case Intrinsic::mips_ilvl_d: 1899 return DAG.getNode(MipsISD::ILVL, DL, Op->getValueType(0), 1900 Op->getOperand(1), Op->getOperand(2)); 1901 case Intrinsic::mips_ilvod_b: 1902 case Intrinsic::mips_ilvod_h: 1903 case Intrinsic::mips_ilvod_w: 1904 case Intrinsic::mips_ilvod_d: 1905 return DAG.getNode(MipsISD::ILVOD, DL, Op->getValueType(0), 1906 Op->getOperand(1), Op->getOperand(2)); 1907 case Intrinsic::mips_ilvr_b: 1908 case Intrinsic::mips_ilvr_h: 1909 case Intrinsic::mips_ilvr_w: 1910 case Intrinsic::mips_ilvr_d: 1911 return DAG.getNode(MipsISD::ILVR, DL, Op->getValueType(0), 1912 Op->getOperand(1), Op->getOperand(2)); 1913 case Intrinsic::mips_insert_b: 1914 case Intrinsic::mips_insert_h: 1915 case Intrinsic::mips_insert_w: 1916 case Intrinsic::mips_insert_d: 1917 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0), 1918 Op->getOperand(1), Op->getOperand(3), Op->getOperand(2)); 1919 case Intrinsic::mips_insve_b: 1920 case Intrinsic::mips_insve_h: 1921 case Intrinsic::mips_insve_w: 1922 case Intrinsic::mips_insve_d: 1923 return DAG.getNode(MipsISD::INSVE, DL, Op->getValueType(0), 1924 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3), 1925 DAG.getConstant(0, MVT::i32)); 1926 case Intrinsic::mips_ldi_b: 1927 case Intrinsic::mips_ldi_h: 1928 case Intrinsic::mips_ldi_w: 1929 case Intrinsic::mips_ldi_d: 1930 return lowerMSASplatImm(Op, 1, DAG); 1931 case Intrinsic::mips_lsa: 1932 case Intrinsic::mips_dlsa: { 1933 EVT ResTy = Op->getValueType(0); 1934 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), 1935 DAG.getNode(ISD::SHL, SDLoc(Op), ResTy, 1936 Op->getOperand(2), Op->getOperand(3))); 1937 } 1938 case Intrinsic::mips_maddv_b: 1939 case Intrinsic::mips_maddv_h: 1940 case Intrinsic::mips_maddv_w: 1941 case Intrinsic::mips_maddv_d: { 1942 EVT ResTy = Op->getValueType(0); 1943 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), 1944 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, 1945 Op->getOperand(2), Op->getOperand(3))); 1946 } 1947 case Intrinsic::mips_max_s_b: 1948 case Intrinsic::mips_max_s_h: 1949 case Intrinsic::mips_max_s_w: 1950 case Intrinsic::mips_max_s_d: 1951 return DAG.getNode(MipsISD::VSMAX, DL, Op->getValueType(0), 1952 Op->getOperand(1), Op->getOperand(2)); 1953 case Intrinsic::mips_max_u_b: 1954 case Intrinsic::mips_max_u_h: 1955 case Intrinsic::mips_max_u_w: 1956 case Intrinsic::mips_max_u_d: 1957 return DAG.getNode(MipsISD::VUMAX, DL, Op->getValueType(0), 1958 Op->getOperand(1), Op->getOperand(2)); 1959 case Intrinsic::mips_maxi_s_b: 1960 case Intrinsic::mips_maxi_s_h: 1961 case Intrinsic::mips_maxi_s_w: 1962 case Intrinsic::mips_maxi_s_d: 1963 return DAG.getNode(MipsISD::VSMAX, DL, Op->getValueType(0), 1964 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1965 case Intrinsic::mips_maxi_u_b: 1966 case Intrinsic::mips_maxi_u_h: 1967 case Intrinsic::mips_maxi_u_w: 1968 case Intrinsic::mips_maxi_u_d: 1969 return DAG.getNode(MipsISD::VUMAX, DL, Op->getValueType(0), 1970 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1971 case Intrinsic::mips_min_s_b: 1972 case Intrinsic::mips_min_s_h: 1973 case Intrinsic::mips_min_s_w: 1974 case Intrinsic::mips_min_s_d: 1975 return DAG.getNode(MipsISD::VSMIN, DL, Op->getValueType(0), 1976 Op->getOperand(1), Op->getOperand(2)); 1977 case Intrinsic::mips_min_u_b: 1978 case Intrinsic::mips_min_u_h: 1979 case Intrinsic::mips_min_u_w: 1980 case Intrinsic::mips_min_u_d: 1981 return DAG.getNode(MipsISD::VUMIN, DL, Op->getValueType(0), 1982 Op->getOperand(1), Op->getOperand(2)); 1983 case Intrinsic::mips_mini_s_b: 1984 case Intrinsic::mips_mini_s_h: 1985 case Intrinsic::mips_mini_s_w: 1986 case Intrinsic::mips_mini_s_d: 1987 return DAG.getNode(MipsISD::VSMIN, DL, Op->getValueType(0), 1988 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1989 case Intrinsic::mips_mini_u_b: 1990 case Intrinsic::mips_mini_u_h: 1991 case Intrinsic::mips_mini_u_w: 1992 case Intrinsic::mips_mini_u_d: 1993 return DAG.getNode(MipsISD::VUMIN, DL, Op->getValueType(0), 1994 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1995 case Intrinsic::mips_mod_s_b: 1996 case Intrinsic::mips_mod_s_h: 1997 case Intrinsic::mips_mod_s_w: 1998 case Intrinsic::mips_mod_s_d: 1999 return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1), 2000 Op->getOperand(2)); 2001 case Intrinsic::mips_mod_u_b: 2002 case Intrinsic::mips_mod_u_h: 2003 case Intrinsic::mips_mod_u_w: 2004 case Intrinsic::mips_mod_u_d: 2005 return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1), 2006 Op->getOperand(2)); 2007 case Intrinsic::mips_mulv_b: 2008 case Intrinsic::mips_mulv_h: 2009 case Intrinsic::mips_mulv_w: 2010 case Intrinsic::mips_mulv_d: 2011 return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1), 2012 Op->getOperand(2)); 2013 case Intrinsic::mips_msubv_b: 2014 case Intrinsic::mips_msubv_h: 2015 case Intrinsic::mips_msubv_w: 2016 case Intrinsic::mips_msubv_d: { 2017 EVT ResTy = Op->getValueType(0); 2018 return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1), 2019 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, 2020 Op->getOperand(2), Op->getOperand(3))); 2021 } 2022 case Intrinsic::mips_nlzc_b: 2023 case Intrinsic::mips_nlzc_h: 2024 case Intrinsic::mips_nlzc_w: 2025 case Intrinsic::mips_nlzc_d: 2026 return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1)); 2027 case Intrinsic::mips_nor_v: { 2028 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2029 Op->getOperand(1), Op->getOperand(2)); 2030 return DAG.getNOT(DL, Res, Res->getValueType(0)); 2031 } 2032 case Intrinsic::mips_nori_b: { 2033 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2034 Op->getOperand(1), 2035 lowerMSASplatImm(Op, 2, DAG)); 2036 return DAG.getNOT(DL, Res, Res->getValueType(0)); 2037 } 2038 case Intrinsic::mips_or_v: 2039 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1), 2040 Op->getOperand(2)); 2041 case Intrinsic::mips_ori_b: 2042 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2043 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2044 case Intrinsic::mips_pckev_b: 2045 case Intrinsic::mips_pckev_h: 2046 case Intrinsic::mips_pckev_w: 2047 case Intrinsic::mips_pckev_d: 2048 return DAG.getNode(MipsISD::PCKEV, DL, Op->getValueType(0), 2049 Op->getOperand(1), Op->getOperand(2)); 2050 case Intrinsic::mips_pckod_b: 2051 case Intrinsic::mips_pckod_h: 2052 case Intrinsic::mips_pckod_w: 2053 case Intrinsic::mips_pckod_d: 2054 return DAG.getNode(MipsISD::PCKOD, DL, Op->getValueType(0), 2055 Op->getOperand(1), Op->getOperand(2)); 2056 case Intrinsic::mips_pcnt_b: 2057 case Intrinsic::mips_pcnt_h: 2058 case Intrinsic::mips_pcnt_w: 2059 case Intrinsic::mips_pcnt_d: 2060 return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1)); 2061 case Intrinsic::mips_shf_b: 2062 case Intrinsic::mips_shf_h: 2063 case Intrinsic::mips_shf_w: 2064 return DAG.getNode(MipsISD::SHF, DL, Op->getValueType(0), 2065 Op->getOperand(2), Op->getOperand(1)); 2066 case Intrinsic::mips_sll_b: 2067 case Intrinsic::mips_sll_h: 2068 case Intrinsic::mips_sll_w: 2069 case Intrinsic::mips_sll_d: 2070 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1), 2071 Op->getOperand(2)); 2072 case Intrinsic::mips_slli_b: 2073 case Intrinsic::mips_slli_h: 2074 case Intrinsic::mips_slli_w: 2075 case Intrinsic::mips_slli_d: 2076 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), 2077 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2078 case Intrinsic::mips_splat_b: 2079 case Intrinsic::mips_splat_h: 2080 case Intrinsic::mips_splat_w: 2081 case Intrinsic::mips_splat_d: 2082 // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle 2083 // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because 2084 // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32. 2085 // Instead we lower to MipsISD::VSHF and match from there. 2086 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2087 lowerMSASplatZExt(Op, 2, DAG), Op->getOperand(1), 2088 Op->getOperand(1)); 2089 case Intrinsic::mips_splati_b: 2090 case Intrinsic::mips_splati_h: 2091 case Intrinsic::mips_splati_w: 2092 case Intrinsic::mips_splati_d: 2093 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2094 lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1), 2095 Op->getOperand(1)); 2096 case Intrinsic::mips_sra_b: 2097 case Intrinsic::mips_sra_h: 2098 case Intrinsic::mips_sra_w: 2099 case Intrinsic::mips_sra_d: 2100 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1), 2101 Op->getOperand(2)); 2102 case Intrinsic::mips_srai_b: 2103 case Intrinsic::mips_srai_h: 2104 case Intrinsic::mips_srai_w: 2105 case Intrinsic::mips_srai_d: 2106 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), 2107 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2108 case Intrinsic::mips_srl_b: 2109 case Intrinsic::mips_srl_h: 2110 case Intrinsic::mips_srl_w: 2111 case Intrinsic::mips_srl_d: 2112 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1), 2113 Op->getOperand(2)); 2114 case Intrinsic::mips_srli_b: 2115 case Intrinsic::mips_srli_h: 2116 case Intrinsic::mips_srli_w: 2117 case Intrinsic::mips_srli_d: 2118 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), 2119 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2120 case Intrinsic::mips_subv_b: 2121 case Intrinsic::mips_subv_h: 2122 case Intrinsic::mips_subv_w: 2123 case Intrinsic::mips_subv_d: 2124 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1), 2125 Op->getOperand(2)); 2126 case Intrinsic::mips_subvi_b: 2127 case Intrinsic::mips_subvi_h: 2128 case Intrinsic::mips_subvi_w: 2129 case Intrinsic::mips_subvi_d: 2130 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), 2131 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2132 case Intrinsic::mips_vshf_b: 2133 case Intrinsic::mips_vshf_h: 2134 case Intrinsic::mips_vshf_w: 2135 case Intrinsic::mips_vshf_d: 2136 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2137 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 2138 case Intrinsic::mips_xor_v: 2139 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1), 2140 Op->getOperand(2)); 2141 case Intrinsic::mips_xori_b: 2142 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), 2143 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2144 } 2145 } 2146 2147 static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr) { 2148 SDLoc DL(Op); 2149 SDValue ChainIn = Op->getOperand(0); 2150 SDValue Address = Op->getOperand(2); 2151 SDValue Offset = Op->getOperand(3); 2152 EVT ResTy = Op->getValueType(0); 2153 EVT PtrTy = Address->getValueType(0); 2154 2155 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); 2156 2157 return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), false, 2158 false, false, 16); 2159 } 2160 2161 SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, 2162 SelectionDAG &DAG) const { 2163 unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); 2164 switch (Intr) { 2165 default: 2166 return SDValue(); 2167 case Intrinsic::mips_extp: 2168 return lowerDSPIntr(Op, DAG, MipsISD::EXTP); 2169 case Intrinsic::mips_extpdp: 2170 return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP); 2171 case Intrinsic::mips_extr_w: 2172 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W); 2173 case Intrinsic::mips_extr_r_w: 2174 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W); 2175 case Intrinsic::mips_extr_rs_w: 2176 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W); 2177 case Intrinsic::mips_extr_s_h: 2178 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H); 2179 case Intrinsic::mips_mthlip: 2180 return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP); 2181 case Intrinsic::mips_mulsaq_s_w_ph: 2182 return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH); 2183 case Intrinsic::mips_maq_s_w_phl: 2184 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL); 2185 case Intrinsic::mips_maq_s_w_phr: 2186 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR); 2187 case Intrinsic::mips_maq_sa_w_phl: 2188 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL); 2189 case Intrinsic::mips_maq_sa_w_phr: 2190 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR); 2191 case Intrinsic::mips_dpaq_s_w_ph: 2192 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH); 2193 case Intrinsic::mips_dpsq_s_w_ph: 2194 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH); 2195 case Intrinsic::mips_dpaq_sa_l_w: 2196 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W); 2197 case Intrinsic::mips_dpsq_sa_l_w: 2198 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W); 2199 case Intrinsic::mips_dpaqx_s_w_ph: 2200 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH); 2201 case Intrinsic::mips_dpaqx_sa_w_ph: 2202 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH); 2203 case Intrinsic::mips_dpsqx_s_w_ph: 2204 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH); 2205 case Intrinsic::mips_dpsqx_sa_w_ph: 2206 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH); 2207 case Intrinsic::mips_ld_b: 2208 case Intrinsic::mips_ld_h: 2209 case Intrinsic::mips_ld_w: 2210 case Intrinsic::mips_ld_d: 2211 return lowerMSALoadIntr(Op, DAG, Intr); 2212 } 2213 } 2214 2215 static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr) { 2216 SDLoc DL(Op); 2217 SDValue ChainIn = Op->getOperand(0); 2218 SDValue Value = Op->getOperand(2); 2219 SDValue Address = Op->getOperand(3); 2220 SDValue Offset = Op->getOperand(4); 2221 EVT PtrTy = Address->getValueType(0); 2222 2223 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); 2224 2225 return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), false, 2226 false, 16); 2227 } 2228 2229 SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op, 2230 SelectionDAG &DAG) const { 2231 unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); 2232 switch (Intr) { 2233 default: 2234 return SDValue(); 2235 case Intrinsic::mips_st_b: 2236 case Intrinsic::mips_st_h: 2237 case Intrinsic::mips_st_w: 2238 case Intrinsic::mips_st_d: 2239 return lowerMSAStoreIntr(Op, DAG, Intr); 2240 } 2241 } 2242 2243 /// \brief Check if the given BuildVectorSDNode is a splat. 2244 /// This method currently relies on DAG nodes being reused when equivalent, 2245 /// so it's possible for this to return false even when isConstantSplat returns 2246 /// true. 2247 static bool isSplatVector(const BuildVectorSDNode *N) { 2248 unsigned int nOps = N->getNumOperands(); 2249 assert(nOps > 1 && "isSplatVector has 0 or 1 sized build vector"); 2250 2251 SDValue Operand0 = N->getOperand(0); 2252 2253 for (unsigned int i = 1; i < nOps; ++i) { 2254 if (N->getOperand(i) != Operand0) 2255 return false; 2256 } 2257 2258 return true; 2259 } 2260 2261 // Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT. 2262 // 2263 // The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We 2264 // choose to sign-extend but we could have equally chosen zero-extend. The 2265 // DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT 2266 // result into this node later (possibly changing it to a zero-extend in the 2267 // process). 2268 SDValue MipsSETargetLowering:: 2269 lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { 2270 SDLoc DL(Op); 2271 EVT ResTy = Op->getValueType(0); 2272 SDValue Op0 = Op->getOperand(0); 2273 EVT VecTy = Op0->getValueType(0); 2274 2275 if (!VecTy.is128BitVector()) 2276 return SDValue(); 2277 2278 if (ResTy.isInteger()) { 2279 SDValue Op1 = Op->getOperand(1); 2280 EVT EltTy = VecTy.getVectorElementType(); 2281 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1, 2282 DAG.getValueType(EltTy)); 2283 } 2284 2285 return Op; 2286 } 2287 2288 static bool isConstantOrUndef(const SDValue Op) { 2289 if (Op->getOpcode() == ISD::UNDEF) 2290 return true; 2291 if (dyn_cast<ConstantSDNode>(Op)) 2292 return true; 2293 if (dyn_cast<ConstantFPSDNode>(Op)) 2294 return true; 2295 return false; 2296 } 2297 2298 static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { 2299 for (unsigned i = 0; i < Op->getNumOperands(); ++i) 2300 if (isConstantOrUndef(Op->getOperand(i))) 2301 return true; 2302 return false; 2303 } 2304 2305 // Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the 2306 // backend. 2307 // 2308 // Lowers according to the following rules: 2309 // - Constant splats are legal as-is as long as the SplatBitSize is a power of 2310 // 2 less than or equal to 64 and the value fits into a signed 10-bit 2311 // immediate 2312 // - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize 2313 // is a power of 2 less than or equal to 64 and the value does not fit into a 2314 // signed 10-bit immediate 2315 // - Non-constant splats are legal as-is. 2316 // - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT. 2317 // - All others are illegal and must be expanded. 2318 SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, 2319 SelectionDAG &DAG) const { 2320 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op); 2321 EVT ResTy = Op->getValueType(0); 2322 SDLoc DL(Op); 2323 APInt SplatValue, SplatUndef; 2324 unsigned SplatBitSize; 2325 bool HasAnyUndefs; 2326 2327 if (!Subtarget->hasMSA() || !ResTy.is128BitVector()) 2328 return SDValue(); 2329 2330 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, 2331 HasAnyUndefs, 8, 2332 !Subtarget->isLittle()) && SplatBitSize <= 64) { 2333 // We can only cope with 8, 16, 32, or 64-bit elements 2334 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && 2335 SplatBitSize != 64) 2336 return SDValue(); 2337 2338 // If the value fits into a simm10 then we can use ldi.[bhwd] 2339 // However, if it isn't an integer type we will have to bitcast from an 2340 // integer type first. Also, if there are any undefs, we must lower them 2341 // to defined values first. 2342 if (ResTy.isInteger() && !HasAnyUndefs && SplatValue.isSignedIntN(10)) 2343 return Op; 2344 2345 EVT ViaVecTy; 2346 2347 switch (SplatBitSize) { 2348 default: 2349 return SDValue(); 2350 case 8: 2351 ViaVecTy = MVT::v16i8; 2352 break; 2353 case 16: 2354 ViaVecTy = MVT::v8i16; 2355 break; 2356 case 32: 2357 ViaVecTy = MVT::v4i32; 2358 break; 2359 case 64: 2360 // There's no fill.d to fall back on for 64-bit values 2361 return SDValue(); 2362 } 2363 2364 // SelectionDAG::getConstant will promote SplatValue appropriately. 2365 SDValue Result = DAG.getConstant(SplatValue, ViaVecTy); 2366 2367 // Bitcast to the type we originally wanted 2368 if (ViaVecTy != ResTy) 2369 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); 2370 2371 return Result; 2372 } else if (isSplatVector(Node)) 2373 return Op; 2374 else if (!isConstantOrUndefBUILD_VECTOR(Node)) { 2375 // Use INSERT_VECTOR_ELT operations rather than expand to stores. 2376 // The resulting code is the same length as the expansion, but it doesn't 2377 // use memory operations 2378 EVT ResTy = Node->getValueType(0); 2379 2380 assert(ResTy.isVector()); 2381 2382 unsigned NumElts = ResTy.getVectorNumElements(); 2383 SDValue Vector = DAG.getUNDEF(ResTy); 2384 for (unsigned i = 0; i < NumElts; ++i) { 2385 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, 2386 Node->getOperand(i), 2387 DAG.getConstant(i, MVT::i32)); 2388 } 2389 return Vector; 2390 } 2391 2392 return SDValue(); 2393 } 2394 2395 // Lower VECTOR_SHUFFLE into SHF (if possible). 2396 // 2397 // SHF splits the vector into blocks of four elements, then shuffles these 2398 // elements according to a <4 x i2> constant (encoded as an integer immediate). 2399 // 2400 // It is therefore possible to lower into SHF when the mask takes the form: 2401 // <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...> 2402 // When undef's appear they are treated as if they were whatever value is 2403 // necessary in order to fit the above form. 2404 // 2405 // For example: 2406 // %2 = shufflevector <8 x i16> %0, <8 x i16> undef, 2407 // <8 x i32> <i32 3, i32 2, i32 1, i32 0, 2408 // i32 7, i32 6, i32 5, i32 4> 2409 // is lowered to: 2410 // (SHF_H $w0, $w1, 27) 2411 // where the 27 comes from: 2412 // 3 + (2 << 2) + (1 << 4) + (0 << 6) 2413 static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, 2414 SmallVector<int, 16> Indices, 2415 SelectionDAG &DAG) { 2416 int SHFIndices[4] = { -1, -1, -1, -1 }; 2417 2418 if (Indices.size() < 4) 2419 return SDValue(); 2420 2421 for (unsigned i = 0; i < 4; ++i) { 2422 for (unsigned j = i; j < Indices.size(); j += 4) { 2423 int Idx = Indices[j]; 2424 2425 // Convert from vector index to 4-element subvector index 2426 // If an index refers to an element outside of the subvector then give up 2427 if (Idx != -1) { 2428 Idx -= 4 * (j / 4); 2429 if (Idx < 0 || Idx >= 4) 2430 return SDValue(); 2431 } 2432 2433 // If the mask has an undef, replace it with the current index. 2434 // Note that it might still be undef if the current index is also undef 2435 if (SHFIndices[i] == -1) 2436 SHFIndices[i] = Idx; 2437 2438 // Check that non-undef values are the same as in the mask. If they 2439 // aren't then give up 2440 if (!(Idx == -1 || Idx == SHFIndices[i])) 2441 return SDValue(); 2442 } 2443 } 2444 2445 // Calculate the immediate. Replace any remaining undefs with zero 2446 APInt Imm(32, 0); 2447 for (int i = 3; i >= 0; --i) { 2448 int Idx = SHFIndices[i]; 2449 2450 if (Idx == -1) 2451 Idx = 0; 2452 2453 Imm <<= 2; 2454 Imm |= Idx & 0x3; 2455 } 2456 2457 return DAG.getNode(MipsISD::SHF, SDLoc(Op), ResTy, 2458 DAG.getConstant(Imm, MVT::i32), Op->getOperand(0)); 2459 } 2460 2461 // Lower VECTOR_SHUFFLE into ILVEV (if possible). 2462 // 2463 // ILVEV interleaves the even elements from each vector. 2464 // 2465 // It is possible to lower into ILVEV when the mask takes the form: 2466 // <0, n, 2, n+2, 4, n+4, ...> 2467 // where n is the number of elements in the vector. 2468 // 2469 // When undef's appear in the mask they are treated as if they were whatever 2470 // value is necessary in order to fit the above form. 2471 static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy, 2472 SmallVector<int, 16> Indices, 2473 SelectionDAG &DAG) { 2474 assert ((Indices.size() % 2) == 0); 2475 int WsIdx = 0; 2476 int WtIdx = ResTy.getVectorNumElements(); 2477 2478 for (unsigned i = 0; i < Indices.size(); i += 2) { 2479 if (Indices[i] != -1 && Indices[i] != WsIdx) 2480 return SDValue(); 2481 if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) 2482 return SDValue(); 2483 WsIdx += 2; 2484 WtIdx += 2; 2485 } 2486 2487 return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Op->getOperand(0), 2488 Op->getOperand(1)); 2489 } 2490 2491 // Lower VECTOR_SHUFFLE into ILVOD (if possible). 2492 // 2493 // ILVOD interleaves the odd elements from each vector. 2494 // 2495 // It is possible to lower into ILVOD when the mask takes the form: 2496 // <1, n+1, 3, n+3, 5, n+5, ...> 2497 // where n is the number of elements in the vector. 2498 // 2499 // When undef's appear in the mask they are treated as if they were whatever 2500 // value is necessary in order to fit the above form. 2501 static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy, 2502 SmallVector<int, 16> Indices, 2503 SelectionDAG &DAG) { 2504 assert ((Indices.size() % 2) == 0); 2505 int WsIdx = 1; 2506 int WtIdx = ResTy.getVectorNumElements() + 1; 2507 2508 for (unsigned i = 0; i < Indices.size(); i += 2) { 2509 if (Indices[i] != -1 && Indices[i] != WsIdx) 2510 return SDValue(); 2511 if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) 2512 return SDValue(); 2513 WsIdx += 2; 2514 WtIdx += 2; 2515 } 2516 2517 return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Op->getOperand(0), 2518 Op->getOperand(1)); 2519 } 2520 2521 // Lower VECTOR_SHUFFLE into ILVL (if possible). 2522 // 2523 // ILVL interleaves consecutive elements from the left half of each vector. 2524 // 2525 // It is possible to lower into ILVL when the mask takes the form: 2526 // <0, n, 1, n+1, 2, n+2, ...> 2527 // where n is the number of elements in the vector. 2528 // 2529 // When undef's appear in the mask they are treated as if they were whatever 2530 // value is necessary in order to fit the above form. 2531 static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy, 2532 SmallVector<int, 16> Indices, 2533 SelectionDAG &DAG) { 2534 assert ((Indices.size() % 2) == 0); 2535 int WsIdx = 0; 2536 int WtIdx = ResTy.getVectorNumElements(); 2537 2538 for (unsigned i = 0; i < Indices.size(); i += 2) { 2539 if (Indices[i] != -1 && Indices[i] != WsIdx) 2540 return SDValue(); 2541 if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) 2542 return SDValue(); 2543 WsIdx ++; 2544 WtIdx ++; 2545 } 2546 2547 return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Op->getOperand(0), 2548 Op->getOperand(1)); 2549 } 2550 2551 // Lower VECTOR_SHUFFLE into ILVR (if possible). 2552 // 2553 // ILVR interleaves consecutive elements from the right half of each vector. 2554 // 2555 // It is possible to lower into ILVR when the mask takes the form: 2556 // <x, n+x, x+1, n+x+1, x+2, n+x+2, ...> 2557 // where n is the number of elements in the vector and x is half n. 2558 // 2559 // When undef's appear in the mask they are treated as if they were whatever 2560 // value is necessary in order to fit the above form. 2561 static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, 2562 SmallVector<int, 16> Indices, 2563 SelectionDAG &DAG) { 2564 assert ((Indices.size() % 2) == 0); 2565 unsigned NumElts = ResTy.getVectorNumElements(); 2566 int WsIdx = NumElts / 2; 2567 int WtIdx = NumElts + NumElts / 2; 2568 2569 for (unsigned i = 0; i < Indices.size(); i += 2) { 2570 if (Indices[i] != -1 && Indices[i] != WsIdx) 2571 return SDValue(); 2572 if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) 2573 return SDValue(); 2574 WsIdx ++; 2575 WtIdx ++; 2576 } 2577 2578 return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Op->getOperand(0), 2579 Op->getOperand(1)); 2580 } 2581 2582 // Lower VECTOR_SHUFFLE into PCKEV (if possible). 2583 // 2584 // PCKEV copies the even elements of each vector into the result vector. 2585 // 2586 // It is possible to lower into PCKEV when the mask takes the form: 2587 // <0, 2, 4, ..., n, n+2, n+4, ...> 2588 // where n is the number of elements in the vector. 2589 // 2590 // When undef's appear in the mask they are treated as if they were whatever 2591 // value is necessary in order to fit the above form. 2592 static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy, 2593 SmallVector<int, 16> Indices, 2594 SelectionDAG &DAG) { 2595 assert ((Indices.size() % 2) == 0); 2596 int Idx = 0; 2597 2598 for (unsigned i = 0; i < Indices.size(); ++i) { 2599 if (Indices[i] != -1 && Indices[i] != Idx) 2600 return SDValue(); 2601 Idx += 2; 2602 } 2603 2604 return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Op->getOperand(0), 2605 Op->getOperand(1)); 2606 } 2607 2608 // Lower VECTOR_SHUFFLE into PCKOD (if possible). 2609 // 2610 // PCKOD copies the odd elements of each vector into the result vector. 2611 // 2612 // It is possible to lower into PCKOD when the mask takes the form: 2613 // <1, 3, 5, ..., n+1, n+3, n+5, ...> 2614 // where n is the number of elements in the vector. 2615 // 2616 // When undef's appear in the mask they are treated as if they were whatever 2617 // value is necessary in order to fit the above form. 2618 static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy, 2619 SmallVector<int, 16> Indices, 2620 SelectionDAG &DAG) { 2621 assert ((Indices.size() % 2) == 0); 2622 int Idx = 1; 2623 2624 for (unsigned i = 0; i < Indices.size(); ++i) { 2625 if (Indices[i] != -1 && Indices[i] != Idx) 2626 return SDValue(); 2627 Idx += 2; 2628 } 2629 2630 return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Op->getOperand(0), 2631 Op->getOperand(1)); 2632 } 2633 2634 // Lower VECTOR_SHUFFLE into VSHF. 2635 // 2636 // This mostly consists of converting the shuffle indices in Indices into a 2637 // BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is 2638 // also code to eliminate unused operands of the VECTOR_SHUFFLE. For example, 2639 // if the type is v8i16 and all the indices are less than 8 then the second 2640 // operand is unused and can be replaced with anything. We choose to replace it 2641 // with the used operand since this reduces the number of instructions overall. 2642 static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, 2643 SmallVector<int, 16> Indices, 2644 SelectionDAG &DAG) { 2645 SmallVector<SDValue, 16> Ops; 2646 SDValue Op0; 2647 SDValue Op1; 2648 EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger(); 2649 EVT MaskEltTy = MaskVecTy.getVectorElementType(); 2650 bool Using1stVec = false; 2651 bool Using2ndVec = false; 2652 SDLoc DL(Op); 2653 int ResTyNumElts = ResTy.getVectorNumElements(); 2654 2655 for (int i = 0; i < ResTyNumElts; ++i) { 2656 // Idx == -1 means UNDEF 2657 int Idx = Indices[i]; 2658 2659 if (0 <= Idx && Idx < ResTyNumElts) 2660 Using1stVec = true; 2661 if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2) 2662 Using2ndVec = true; 2663 } 2664 2665 for (SmallVector<int, 16>::iterator I = Indices.begin(); I != Indices.end(); 2666 ++I) 2667 Ops.push_back(DAG.getTargetConstant(*I, MaskEltTy)); 2668 2669 SDValue MaskVec = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVecTy, Ops); 2670 2671 if (Using1stVec && Using2ndVec) { 2672 Op0 = Op->getOperand(0); 2673 Op1 = Op->getOperand(1); 2674 } else if (Using1stVec) 2675 Op0 = Op1 = Op->getOperand(0); 2676 else if (Using2ndVec) 2677 Op0 = Op1 = Op->getOperand(1); 2678 else 2679 llvm_unreachable("shuffle vector mask references neither vector operand?"); 2680 2681 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion. 2682 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11> 2683 // VSHF concatenates the vectors in a bitwise fashion: 2684 // <0b00, 0b01> + <0b10, 0b11> -> 2685 // 0b0100 + 0b1110 -> 0b01001110 2686 // <0b10, 0b11, 0b00, 0b01> 2687 // We must therefore swap the operands to get the correct result. 2688 return DAG.getNode(MipsISD::VSHF, DL, ResTy, MaskVec, Op1, Op0); 2689 } 2690 2691 // Lower VECTOR_SHUFFLE into one of a number of instructions depending on the 2692 // indices in the shuffle. 2693 SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, 2694 SelectionDAG &DAG) const { 2695 ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Op); 2696 EVT ResTy = Op->getValueType(0); 2697 2698 if (!ResTy.is128BitVector()) 2699 return SDValue(); 2700 2701 int ResTyNumElts = ResTy.getVectorNumElements(); 2702 SmallVector<int, 16> Indices; 2703 2704 for (int i = 0; i < ResTyNumElts; ++i) 2705 Indices.push_back(Node->getMaskElt(i)); 2706 2707 SDValue Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG); 2708 if (Result.getNode()) 2709 return Result; 2710 Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG); 2711 if (Result.getNode()) 2712 return Result; 2713 Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG); 2714 if (Result.getNode()) 2715 return Result; 2716 Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG); 2717 if (Result.getNode()) 2718 return Result; 2719 Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG); 2720 if (Result.getNode()) 2721 return Result; 2722 Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG); 2723 if (Result.getNode()) 2724 return Result; 2725 Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG); 2726 if (Result.getNode()) 2727 return Result; 2728 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); 2729 } 2730 2731 MachineBasicBlock * MipsSETargetLowering:: 2732 emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{ 2733 // $bb: 2734 // bposge32_pseudo $vr0 2735 // => 2736 // $bb: 2737 // bposge32 $tbb 2738 // $fbb: 2739 // li $vr2, 0 2740 // b $sink 2741 // $tbb: 2742 // li $vr1, 1 2743 // $sink: 2744 // $vr0 = phi($vr2, $fbb, $vr1, $tbb) 2745 2746 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2747 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2748 const TargetRegisterClass *RC = &Mips::GPR32RegClass; 2749 DebugLoc DL = MI->getDebugLoc(); 2750 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 2751 MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); 2752 MachineFunction *F = BB->getParent(); 2753 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); 2754 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); 2755 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); 2756 F->insert(It, FBB); 2757 F->insert(It, TBB); 2758 F->insert(It, Sink); 2759 2760 // Transfer the remainder of BB and its successor edges to Sink. 2761 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), 2762 BB->end()); 2763 Sink->transferSuccessorsAndUpdatePHIs(BB); 2764 2765 // Add successors. 2766 BB->addSuccessor(FBB); 2767 BB->addSuccessor(TBB); 2768 FBB->addSuccessor(Sink); 2769 TBB->addSuccessor(Sink); 2770 2771 // Insert the real bposge32 instruction to $BB. 2772 BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB); 2773 2774 // Fill $FBB. 2775 unsigned VR2 = RegInfo.createVirtualRegister(RC); 2776 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) 2777 .addReg(Mips::ZERO).addImm(0); 2778 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); 2779 2780 // Fill $TBB. 2781 unsigned VR1 = RegInfo.createVirtualRegister(RC); 2782 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) 2783 .addReg(Mips::ZERO).addImm(1); 2784 2785 // Insert phi function to $Sink. 2786 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), 2787 MI->getOperand(0).getReg()) 2788 .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB); 2789 2790 MI->eraseFromParent(); // The pseudo instruction is gone now. 2791 return Sink; 2792 } 2793 2794 MachineBasicBlock * MipsSETargetLowering:: 2795 emitMSACBranchPseudo(MachineInstr *MI, MachineBasicBlock *BB, 2796 unsigned BranchOp) const{ 2797 // $bb: 2798 // vany_nonzero $rd, $ws 2799 // => 2800 // $bb: 2801 // bnz.b $ws, $tbb 2802 // b $fbb 2803 // $fbb: 2804 // li $rd1, 0 2805 // b $sink 2806 // $tbb: 2807 // li $rd2, 1 2808 // $sink: 2809 // $rd = phi($rd1, $fbb, $rd2, $tbb) 2810 2811 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2812 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2813 const TargetRegisterClass *RC = &Mips::GPR32RegClass; 2814 DebugLoc DL = MI->getDebugLoc(); 2815 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 2816 MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); 2817 MachineFunction *F = BB->getParent(); 2818 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); 2819 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); 2820 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); 2821 F->insert(It, FBB); 2822 F->insert(It, TBB); 2823 F->insert(It, Sink); 2824 2825 // Transfer the remainder of BB and its successor edges to Sink. 2826 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), 2827 BB->end()); 2828 Sink->transferSuccessorsAndUpdatePHIs(BB); 2829 2830 // Add successors. 2831 BB->addSuccessor(FBB); 2832 BB->addSuccessor(TBB); 2833 FBB->addSuccessor(Sink); 2834 TBB->addSuccessor(Sink); 2835 2836 // Insert the real bnz.b instruction to $BB. 2837 BuildMI(BB, DL, TII->get(BranchOp)) 2838 .addReg(MI->getOperand(1).getReg()) 2839 .addMBB(TBB); 2840 2841 // Fill $FBB. 2842 unsigned RD1 = RegInfo.createVirtualRegister(RC); 2843 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1) 2844 .addReg(Mips::ZERO).addImm(0); 2845 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); 2846 2847 // Fill $TBB. 2848 unsigned RD2 = RegInfo.createVirtualRegister(RC); 2849 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2) 2850 .addReg(Mips::ZERO).addImm(1); 2851 2852 // Insert phi function to $Sink. 2853 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), 2854 MI->getOperand(0).getReg()) 2855 .addReg(RD1).addMBB(FBB).addReg(RD2).addMBB(TBB); 2856 2857 MI->eraseFromParent(); // The pseudo instruction is gone now. 2858 return Sink; 2859 } 2860 2861 // Emit the COPY_FW pseudo instruction. 2862 // 2863 // copy_fw_pseudo $fd, $ws, n 2864 // => 2865 // copy_u_w $rt, $ws, $n 2866 // mtc1 $rt, $fd 2867 // 2868 // When n is zero, the equivalent operation can be performed with (potentially) 2869 // zero instructions due to register overlaps. This optimization is never valid 2870 // for lane 1 because it would require FR=0 mode which isn't supported by MSA. 2871 MachineBasicBlock * MipsSETargetLowering:: 2872 emitCOPY_FW(MachineInstr *MI, MachineBasicBlock *BB) const{ 2873 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2874 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2875 DebugLoc DL = MI->getDebugLoc(); 2876 unsigned Fd = MI->getOperand(0).getReg(); 2877 unsigned Ws = MI->getOperand(1).getReg(); 2878 unsigned Lane = MI->getOperand(2).getImm(); 2879 2880 if (Lane == 0) 2881 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_lo); 2882 else { 2883 unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 2884 2885 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane); 2886 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); 2887 } 2888 2889 MI->eraseFromParent(); // The pseudo instruction is gone now. 2890 return BB; 2891 } 2892 2893 // Emit the COPY_FD pseudo instruction. 2894 // 2895 // copy_fd_pseudo $fd, $ws, n 2896 // => 2897 // splati.d $wt, $ws, $n 2898 // copy $fd, $wt:sub_64 2899 // 2900 // When n is zero, the equivalent operation can be performed with (potentially) 2901 // zero instructions due to register overlaps. This optimization is always 2902 // valid because FR=1 mode which is the only supported mode in MSA. 2903 MachineBasicBlock * MipsSETargetLowering:: 2904 emitCOPY_FD(MachineInstr *MI, MachineBasicBlock *BB) const{ 2905 assert(Subtarget->isFP64bit()); 2906 2907 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2908 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2909 unsigned Fd = MI->getOperand(0).getReg(); 2910 unsigned Ws = MI->getOperand(1).getReg(); 2911 unsigned Lane = MI->getOperand(2).getImm() * 2; 2912 DebugLoc DL = MI->getDebugLoc(); 2913 2914 if (Lane == 0) 2915 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_64); 2916 else { 2917 unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 2918 2919 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wt).addReg(Ws).addImm(1); 2920 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_64); 2921 } 2922 2923 MI->eraseFromParent(); // The pseudo instruction is gone now. 2924 return BB; 2925 } 2926 2927 // Emit the INSERT_FW pseudo instruction. 2928 // 2929 // insert_fw_pseudo $wd, $wd_in, $n, $fs 2930 // => 2931 // subreg_to_reg $wt:sub_lo, $fs 2932 // insve_w $wd[$n], $wd_in, $wt[0] 2933 MachineBasicBlock * 2934 MipsSETargetLowering::emitINSERT_FW(MachineInstr *MI, 2935 MachineBasicBlock *BB) const { 2936 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2937 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2938 DebugLoc DL = MI->getDebugLoc(); 2939 unsigned Wd = MI->getOperand(0).getReg(); 2940 unsigned Wd_in = MI->getOperand(1).getReg(); 2941 unsigned Lane = MI->getOperand(2).getImm(); 2942 unsigned Fs = MI->getOperand(3).getReg(); 2943 unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 2944 2945 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 2946 .addImm(0) 2947 .addReg(Fs) 2948 .addImm(Mips::sub_lo); 2949 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_W), Wd) 2950 .addReg(Wd_in) 2951 .addImm(Lane) 2952 .addReg(Wt) 2953 .addImm(0); 2954 2955 MI->eraseFromParent(); // The pseudo instruction is gone now. 2956 return BB; 2957 } 2958 2959 // Emit the INSERT_FD pseudo instruction. 2960 // 2961 // insert_fd_pseudo $wd, $fs, n 2962 // => 2963 // subreg_to_reg $wt:sub_64, $fs 2964 // insve_d $wd[$n], $wd_in, $wt[0] 2965 MachineBasicBlock * 2966 MipsSETargetLowering::emitINSERT_FD(MachineInstr *MI, 2967 MachineBasicBlock *BB) const { 2968 assert(Subtarget->isFP64bit()); 2969 2970 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2971 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2972 DebugLoc DL = MI->getDebugLoc(); 2973 unsigned Wd = MI->getOperand(0).getReg(); 2974 unsigned Wd_in = MI->getOperand(1).getReg(); 2975 unsigned Lane = MI->getOperand(2).getImm(); 2976 unsigned Fs = MI->getOperand(3).getReg(); 2977 unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 2978 2979 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 2980 .addImm(0) 2981 .addReg(Fs) 2982 .addImm(Mips::sub_64); 2983 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_D), Wd) 2984 .addReg(Wd_in) 2985 .addImm(Lane) 2986 .addReg(Wt) 2987 .addImm(0); 2988 2989 MI->eraseFromParent(); // The pseudo instruction is gone now. 2990 return BB; 2991 } 2992 2993 // Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction. 2994 // 2995 // For integer: 2996 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs) 2997 // => 2998 // (SLL $lanetmp1, $lane, <log2size) 2999 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) 3000 // (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs) 3001 // (NEG $lanetmp2, $lanetmp1) 3002 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) 3003 // 3004 // For floating point: 3005 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs) 3006 // => 3007 // (SUBREG_TO_REG $wt, $fs, <subreg>) 3008 // (SLL $lanetmp1, $lane, <log2size) 3009 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) 3010 // (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0) 3011 // (NEG $lanetmp2, $lanetmp1) 3012 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) 3013 MachineBasicBlock * 3014 MipsSETargetLowering::emitINSERT_DF_VIDX(MachineInstr *MI, 3015 MachineBasicBlock *BB, 3016 unsigned EltSizeInBytes, 3017 bool IsFP) const { 3018 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3019 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3020 DebugLoc DL = MI->getDebugLoc(); 3021 unsigned Wd = MI->getOperand(0).getReg(); 3022 unsigned SrcVecReg = MI->getOperand(1).getReg(); 3023 unsigned LaneReg = MI->getOperand(2).getReg(); 3024 unsigned SrcValReg = MI->getOperand(3).getReg(); 3025 3026 const TargetRegisterClass *VecRC = nullptr; 3027 const TargetRegisterClass *GPRRC = 3028 Subtarget->isGP64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3029 unsigned EltLog2Size; 3030 unsigned InsertOp = 0; 3031 unsigned InsveOp = 0; 3032 switch (EltSizeInBytes) { 3033 default: 3034 llvm_unreachable("Unexpected size"); 3035 case 1: 3036 EltLog2Size = 0; 3037 InsertOp = Mips::INSERT_B; 3038 InsveOp = Mips::INSVE_B; 3039 VecRC = &Mips::MSA128BRegClass; 3040 break; 3041 case 2: 3042 EltLog2Size = 1; 3043 InsertOp = Mips::INSERT_H; 3044 InsveOp = Mips::INSVE_H; 3045 VecRC = &Mips::MSA128HRegClass; 3046 break; 3047 case 4: 3048 EltLog2Size = 2; 3049 InsertOp = Mips::INSERT_W; 3050 InsveOp = Mips::INSVE_W; 3051 VecRC = &Mips::MSA128WRegClass; 3052 break; 3053 case 8: 3054 EltLog2Size = 3; 3055 InsertOp = Mips::INSERT_D; 3056 InsveOp = Mips::INSVE_D; 3057 VecRC = &Mips::MSA128DRegClass; 3058 break; 3059 } 3060 3061 if (IsFP) { 3062 unsigned Wt = RegInfo.createVirtualRegister(VecRC); 3063 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3064 .addImm(0) 3065 .addReg(SrcValReg) 3066 .addImm(EltSizeInBytes == 8 ? Mips::sub_64 : Mips::sub_lo); 3067 SrcValReg = Wt; 3068 } 3069 3070 // Convert the lane index into a byte index 3071 if (EltSizeInBytes != 1) { 3072 unsigned LaneTmp1 = RegInfo.createVirtualRegister(GPRRC); 3073 BuildMI(*BB, MI, DL, TII->get(Mips::SLL), LaneTmp1) 3074 .addReg(LaneReg) 3075 .addImm(EltLog2Size); 3076 LaneReg = LaneTmp1; 3077 } 3078 3079 // Rotate bytes around so that the desired lane is element zero 3080 unsigned WdTmp1 = RegInfo.createVirtualRegister(VecRC); 3081 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), WdTmp1) 3082 .addReg(SrcVecReg) 3083 .addReg(SrcVecReg) 3084 .addReg(LaneReg); 3085 3086 unsigned WdTmp2 = RegInfo.createVirtualRegister(VecRC); 3087 if (IsFP) { 3088 // Use insve.df to insert to element zero 3089 BuildMI(*BB, MI, DL, TII->get(InsveOp), WdTmp2) 3090 .addReg(WdTmp1) 3091 .addImm(0) 3092 .addReg(SrcValReg) 3093 .addImm(0); 3094 } else { 3095 // Use insert.df to insert to element zero 3096 BuildMI(*BB, MI, DL, TII->get(InsertOp), WdTmp2) 3097 .addReg(WdTmp1) 3098 .addReg(SrcValReg) 3099 .addImm(0); 3100 } 3101 3102 // Rotate elements the rest of the way for a full rotation. 3103 // sld.df inteprets $rt modulo the number of columns so we only need to negate 3104 // the lane index to do this. 3105 unsigned LaneTmp2 = RegInfo.createVirtualRegister(GPRRC); 3106 BuildMI(*BB, MI, DL, TII->get(Mips::SUB), LaneTmp2) 3107 .addReg(Mips::ZERO) 3108 .addReg(LaneReg); 3109 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), Wd) 3110 .addReg(WdTmp2) 3111 .addReg(WdTmp2) 3112 .addReg(LaneTmp2); 3113 3114 MI->eraseFromParent(); // The pseudo instruction is gone now. 3115 return BB; 3116 } 3117 3118 // Emit the FILL_FW pseudo instruction. 3119 // 3120 // fill_fw_pseudo $wd, $fs 3121 // => 3122 // implicit_def $wt1 3123 // insert_subreg $wt2:subreg_lo, $wt1, $fs 3124 // splati.w $wd, $wt2[0] 3125 MachineBasicBlock * 3126 MipsSETargetLowering::emitFILL_FW(MachineInstr *MI, 3127 MachineBasicBlock *BB) const { 3128 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3129 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3130 DebugLoc DL = MI->getDebugLoc(); 3131 unsigned Wd = MI->getOperand(0).getReg(); 3132 unsigned Fs = MI->getOperand(1).getReg(); 3133 unsigned Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3134 unsigned Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3135 3136 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); 3137 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) 3138 .addReg(Wt1) 3139 .addReg(Fs) 3140 .addImm(Mips::sub_lo); 3141 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wd).addReg(Wt2).addImm(0); 3142 3143 MI->eraseFromParent(); // The pseudo instruction is gone now. 3144 return BB; 3145 } 3146 3147 // Emit the FILL_FD pseudo instruction. 3148 // 3149 // fill_fd_pseudo $wd, $fs 3150 // => 3151 // implicit_def $wt1 3152 // insert_subreg $wt2:subreg_64, $wt1, $fs 3153 // splati.d $wd, $wt2[0] 3154 MachineBasicBlock * 3155 MipsSETargetLowering::emitFILL_FD(MachineInstr *MI, 3156 MachineBasicBlock *BB) const { 3157 assert(Subtarget->isFP64bit()); 3158 3159 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3160 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3161 DebugLoc DL = MI->getDebugLoc(); 3162 unsigned Wd = MI->getOperand(0).getReg(); 3163 unsigned Fs = MI->getOperand(1).getReg(); 3164 unsigned Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3165 unsigned Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3166 3167 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); 3168 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) 3169 .addReg(Wt1) 3170 .addReg(Fs) 3171 .addImm(Mips::sub_64); 3172 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wd).addReg(Wt2).addImm(0); 3173 3174 MI->eraseFromParent(); // The pseudo instruction is gone now. 3175 return BB; 3176 } 3177 3178 // Emit the FEXP2_W_1 pseudo instructions. 3179 // 3180 // fexp2_w_1_pseudo $wd, $wt 3181 // => 3182 // ldi.w $ws, 1 3183 // fexp2.w $wd, $ws, $wt 3184 MachineBasicBlock * 3185 MipsSETargetLowering::emitFEXP2_W_1(MachineInstr *MI, 3186 MachineBasicBlock *BB) const { 3187 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3188 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3189 const TargetRegisterClass *RC = &Mips::MSA128WRegClass; 3190 unsigned Ws1 = RegInfo.createVirtualRegister(RC); 3191 unsigned Ws2 = RegInfo.createVirtualRegister(RC); 3192 DebugLoc DL = MI->getDebugLoc(); 3193 3194 // Splat 1.0 into a vector 3195 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_W), Ws1).addImm(1); 3196 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_W), Ws2).addReg(Ws1); 3197 3198 // Emit 1.0 * fexp2(Wt) 3199 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_W), MI->getOperand(0).getReg()) 3200 .addReg(Ws2) 3201 .addReg(MI->getOperand(1).getReg()); 3202 3203 MI->eraseFromParent(); // The pseudo instruction is gone now. 3204 return BB; 3205 } 3206 3207 // Emit the FEXP2_D_1 pseudo instructions. 3208 // 3209 // fexp2_d_1_pseudo $wd, $wt 3210 // => 3211 // ldi.d $ws, 1 3212 // fexp2.d $wd, $ws, $wt 3213 MachineBasicBlock * 3214 MipsSETargetLowering::emitFEXP2_D_1(MachineInstr *MI, 3215 MachineBasicBlock *BB) const { 3216 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3217 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3218 const TargetRegisterClass *RC = &Mips::MSA128DRegClass; 3219 unsigned Ws1 = RegInfo.createVirtualRegister(RC); 3220 unsigned Ws2 = RegInfo.createVirtualRegister(RC); 3221 DebugLoc DL = MI->getDebugLoc(); 3222 3223 // Splat 1.0 into a vector 3224 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_D), Ws1).addImm(1); 3225 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_D), Ws2).addReg(Ws1); 3226 3227 // Emit 1.0 * fexp2(Wt) 3228 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_D), MI->getOperand(0).getReg()) 3229 .addReg(Ws2) 3230 .addReg(MI->getOperand(1).getReg()); 3231 3232 MI->eraseFromParent(); // The pseudo instruction is gone now. 3233 return BB; 3234 } 3235