1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===// 2 // The LLVM Compiler Infrastructure 3 // 4 // This file is distributed under the University of Illinois Open Source 5 // License. See LICENSE.TXT for details. 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the SPUTargetLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "SPUISelLowering.h" 14 #include "SPUTargetMachine.h" 15 #include "SPUFrameLowering.h" 16 #include "SPUMachineFunction.h" 17 #include "llvm/Constants.h" 18 #include "llvm/Function.h" 19 #include "llvm/Intrinsics.h" 20 #include "llvm/CallingConv.h" 21 #include "llvm/Type.h" 22 #include "llvm/CodeGen/CallingConvLower.h" 23 #include "llvm/CodeGen/MachineFrameInfo.h" 24 #include "llvm/CodeGen/MachineFunction.h" 25 #include "llvm/CodeGen/MachineInstrBuilder.h" 26 #include "llvm/CodeGen/MachineRegisterInfo.h" 27 #include "llvm/CodeGen/SelectionDAG.h" 28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 29 #include "llvm/Target/TargetOptions.h" 30 #include "llvm/ADT/VectorExtras.h" 31 #include "llvm/Support/Debug.h" 32 #include "llvm/Support/ErrorHandling.h" 33 #include "llvm/Support/MathExtras.h" 34 #include "llvm/Support/raw_ostream.h" 35 #include <map> 36 37 using namespace llvm; 38 39 // Used in getTargetNodeName() below 40 namespace { 41 std::map<unsigned, const char *> node_names; 42 43 // Byte offset of the preferred slot (counted from the MSB) 44 int prefslotOffset(EVT VT) { 45 int retval=0; 46 if (VT==MVT::i1) retval=3; 47 if (VT==MVT::i8) retval=3; 48 if (VT==MVT::i16) retval=2; 49 50 return retval; 51 } 52 53 //! Expand a library call into an actual call DAG node 54 /*! 55 \note 56 This code is taken from SelectionDAGLegalize, since it is not exposed as 57 part of the LLVM SelectionDAG API. 58 */ 59 60 SDValue 61 ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG, 62 bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) { 63 // The input chain to this libcall is the entry node of the function. 64 // Legalizing the call will automatically add the previous call to the 65 // dependence. 66 SDValue InChain = DAG.getEntryNode(); 67 68 TargetLowering::ArgListTy Args; 69 TargetLowering::ArgListEntry Entry; 70 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { 71 EVT ArgVT = Op.getOperand(i).getValueType(); 72 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); 73 Entry.Node = Op.getOperand(i); 74 Entry.Ty = ArgTy; 75 Entry.isSExt = isSigned; 76 Entry.isZExt = !isSigned; 77 Args.push_back(Entry); 78 } 79 SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), 80 TLI.getPointerTy()); 81 82 // Splice the libcall in wherever FindInputOutputChains tells us to. 83 Type *RetTy = 84 Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext()); 85 std::pair<SDValue, SDValue> CallInfo = 86 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 87 0, TLI.getLibcallCallingConv(LC), false, 88 /*isReturnValueUsed=*/true, 89 Callee, Args, DAG, Op.getDebugLoc()); 90 91 return CallInfo.first; 92 } 93 } 94 95 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) 96 : TargetLowering(TM, new TargetLoweringObjectFileELF()), 97 SPUTM(TM) { 98 99 // Use _setjmp/_longjmp instead of setjmp/longjmp. 100 setUseUnderscoreSetJmp(true); 101 setUseUnderscoreLongJmp(true); 102 103 // Set RTLIB libcall names as used by SPU: 104 setLibcallName(RTLIB::DIV_F64, "__fast_divdf3"); 105 106 // Set up the SPU's register classes: 107 addRegisterClass(MVT::i8, SPU::R8CRegisterClass); 108 addRegisterClass(MVT::i16, SPU::R16CRegisterClass); 109 addRegisterClass(MVT::i32, SPU::R32CRegisterClass); 110 addRegisterClass(MVT::i64, SPU::R64CRegisterClass); 111 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass); 112 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass); 113 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass); 114 115 // SPU has no sign or zero extended loads for i1, i8, i16: 116 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); 117 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 118 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); 119 120 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 121 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); 122 123 setTruncStoreAction(MVT::i128, MVT::i64, Expand); 124 setTruncStoreAction(MVT::i128, MVT::i32, Expand); 125 setTruncStoreAction(MVT::i128, MVT::i16, Expand); 126 setTruncStoreAction(MVT::i128, MVT::i8, Expand); 127 128 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 129 130 // SPU constant load actions are custom lowered: 131 setOperationAction(ISD::ConstantFP, MVT::f32, Legal); 132 setOperationAction(ISD::ConstantFP, MVT::f64, Custom); 133 134 // SPU's loads and stores have to be custom lowered: 135 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128; 136 ++sctype) { 137 MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype; 138 139 setOperationAction(ISD::LOAD, VT, Custom); 140 setOperationAction(ISD::STORE, VT, Custom); 141 setLoadExtAction(ISD::EXTLOAD, VT, Custom); 142 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom); 143 setLoadExtAction(ISD::SEXTLOAD, VT, Custom); 144 145 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) { 146 MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype; 147 setTruncStoreAction(VT, StoreVT, Expand); 148 } 149 } 150 151 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64; 152 ++sctype) { 153 MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype; 154 155 setOperationAction(ISD::LOAD, VT, Custom); 156 setOperationAction(ISD::STORE, VT, Custom); 157 158 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) { 159 MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype; 160 setTruncStoreAction(VT, StoreVT, Expand); 161 } 162 } 163 164 // Expand the jumptable branches 165 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 166 setOperationAction(ISD::BR_CC, MVT::Other, Expand); 167 168 // Custom lower SELECT_CC for most cases, but expand by default 169 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); 170 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom); 171 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom); 172 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 173 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); 174 175 // SPU has no intrinsics for these particular operations: 176 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); 177 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); 178 179 // SPU has no division/remainder instructions 180 setOperationAction(ISD::SREM, MVT::i8, Expand); 181 setOperationAction(ISD::UREM, MVT::i8, Expand); 182 setOperationAction(ISD::SDIV, MVT::i8, Expand); 183 setOperationAction(ISD::UDIV, MVT::i8, Expand); 184 setOperationAction(ISD::SDIVREM, MVT::i8, Expand); 185 setOperationAction(ISD::UDIVREM, MVT::i8, Expand); 186 setOperationAction(ISD::SREM, MVT::i16, Expand); 187 setOperationAction(ISD::UREM, MVT::i16, Expand); 188 setOperationAction(ISD::SDIV, MVT::i16, Expand); 189 setOperationAction(ISD::UDIV, MVT::i16, Expand); 190 setOperationAction(ISD::SDIVREM, MVT::i16, Expand); 191 setOperationAction(ISD::UDIVREM, MVT::i16, Expand); 192 setOperationAction(ISD::SREM, MVT::i32, Expand); 193 setOperationAction(ISD::UREM, MVT::i32, Expand); 194 setOperationAction(ISD::SDIV, MVT::i32, Expand); 195 setOperationAction(ISD::UDIV, MVT::i32, Expand); 196 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 197 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 198 setOperationAction(ISD::SREM, MVT::i64, Expand); 199 setOperationAction(ISD::UREM, MVT::i64, Expand); 200 setOperationAction(ISD::SDIV, MVT::i64, Expand); 201 setOperationAction(ISD::UDIV, MVT::i64, Expand); 202 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 203 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 204 setOperationAction(ISD::SREM, MVT::i128, Expand); 205 setOperationAction(ISD::UREM, MVT::i128, Expand); 206 setOperationAction(ISD::SDIV, MVT::i128, Expand); 207 setOperationAction(ISD::UDIV, MVT::i128, Expand); 208 setOperationAction(ISD::SDIVREM, MVT::i128, Expand); 209 setOperationAction(ISD::UDIVREM, MVT::i128, Expand); 210 211 // We don't support sin/cos/sqrt/fmod 212 setOperationAction(ISD::FSIN , MVT::f64, Expand); 213 setOperationAction(ISD::FCOS , MVT::f64, Expand); 214 setOperationAction(ISD::FREM , MVT::f64, Expand); 215 setOperationAction(ISD::FSIN , MVT::f32, Expand); 216 setOperationAction(ISD::FCOS , MVT::f32, Expand); 217 setOperationAction(ISD::FREM , MVT::f32, Expand); 218 219 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt 220 // for f32!) 221 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 222 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 223 224 setOperationAction(ISD::FMA, MVT::f64, Expand); 225 setOperationAction(ISD::FMA, MVT::f32, Expand); 226 227 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 228 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 229 230 // SPU can do rotate right and left, so legalize it... but customize for i8 231 // because instructions don't exist. 232 233 // FIXME: Change from "expand" to appropriate type once ROTR is supported in 234 // .td files. 235 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/); 236 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/); 237 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/); 238 239 setOperationAction(ISD::ROTL, MVT::i32, Legal); 240 setOperationAction(ISD::ROTL, MVT::i16, Legal); 241 setOperationAction(ISD::ROTL, MVT::i8, Custom); 242 243 // SPU has no native version of shift left/right for i8 244 setOperationAction(ISD::SHL, MVT::i8, Custom); 245 setOperationAction(ISD::SRL, MVT::i8, Custom); 246 setOperationAction(ISD::SRA, MVT::i8, Custom); 247 248 // Make these operations legal and handle them during instruction selection: 249 setOperationAction(ISD::SHL, MVT::i64, Legal); 250 setOperationAction(ISD::SRL, MVT::i64, Legal); 251 setOperationAction(ISD::SRA, MVT::i64, Legal); 252 253 // Custom lower i8, i32 and i64 multiplications 254 setOperationAction(ISD::MUL, MVT::i8, Custom); 255 setOperationAction(ISD::MUL, MVT::i32, Legal); 256 setOperationAction(ISD::MUL, MVT::i64, Legal); 257 258 // Expand double-width multiplication 259 // FIXME: It would probably be reasonable to support some of these operations 260 setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand); 261 setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand); 262 setOperationAction(ISD::MULHU, MVT::i8, Expand); 263 setOperationAction(ISD::MULHS, MVT::i8, Expand); 264 setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand); 265 setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand); 266 setOperationAction(ISD::MULHU, MVT::i16, Expand); 267 setOperationAction(ISD::MULHS, MVT::i16, Expand); 268 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 269 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 270 setOperationAction(ISD::MULHU, MVT::i32, Expand); 271 setOperationAction(ISD::MULHS, MVT::i32, Expand); 272 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 273 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 274 setOperationAction(ISD::MULHU, MVT::i64, Expand); 275 setOperationAction(ISD::MULHS, MVT::i64, Expand); 276 277 // Need to custom handle (some) common i8, i64 math ops 278 setOperationAction(ISD::ADD, MVT::i8, Custom); 279 setOperationAction(ISD::ADD, MVT::i64, Legal); 280 setOperationAction(ISD::SUB, MVT::i8, Custom); 281 setOperationAction(ISD::SUB, MVT::i64, Legal); 282 283 // SPU does not have BSWAP. It does have i32 support CTLZ. 284 // CTPOP has to be custom lowered. 285 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 286 setOperationAction(ISD::BSWAP, MVT::i64, Expand); 287 288 setOperationAction(ISD::CTPOP, MVT::i8, Custom); 289 setOperationAction(ISD::CTPOP, MVT::i16, Custom); 290 setOperationAction(ISD::CTPOP, MVT::i32, Custom); 291 setOperationAction(ISD::CTPOP, MVT::i64, Custom); 292 setOperationAction(ISD::CTPOP, MVT::i128, Expand); 293 294 setOperationAction(ISD::CTTZ , MVT::i8, Expand); 295 setOperationAction(ISD::CTTZ , MVT::i16, Expand); 296 setOperationAction(ISD::CTTZ , MVT::i32, Expand); 297 setOperationAction(ISD::CTTZ , MVT::i64, Expand); 298 setOperationAction(ISD::CTTZ , MVT::i128, Expand); 299 300 setOperationAction(ISD::CTLZ , MVT::i8, Promote); 301 setOperationAction(ISD::CTLZ , MVT::i16, Promote); 302 setOperationAction(ISD::CTLZ , MVT::i32, Legal); 303 setOperationAction(ISD::CTLZ , MVT::i64, Expand); 304 setOperationAction(ISD::CTLZ , MVT::i128, Expand); 305 306 // SPU has a version of select that implements (a&~c)|(b&c), just like 307 // select ought to work: 308 setOperationAction(ISD::SELECT, MVT::i8, Legal); 309 setOperationAction(ISD::SELECT, MVT::i16, Legal); 310 setOperationAction(ISD::SELECT, MVT::i32, Legal); 311 setOperationAction(ISD::SELECT, MVT::i64, Legal); 312 313 setOperationAction(ISD::SETCC, MVT::i8, Legal); 314 setOperationAction(ISD::SETCC, MVT::i16, Legal); 315 setOperationAction(ISD::SETCC, MVT::i32, Legal); 316 setOperationAction(ISD::SETCC, MVT::i64, Legal); 317 setOperationAction(ISD::SETCC, MVT::f64, Custom); 318 319 // Custom lower i128 -> i64 truncates 320 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom); 321 322 // Custom lower i32/i64 -> i128 sign extend 323 setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom); 324 325 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote); 326 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); 327 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote); 328 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote); 329 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need 330 // to expand to a libcall, hence the custom lowering: 331 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 332 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 333 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand); 334 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 335 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand); 336 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand); 337 338 // FDIV on SPU requires custom lowering 339 setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall 340 341 // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64: 342 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 343 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); 344 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); 345 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 346 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); 347 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); 348 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 349 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 350 351 setOperationAction(ISD::BITCAST, MVT::i32, Legal); 352 setOperationAction(ISD::BITCAST, MVT::f32, Legal); 353 setOperationAction(ISD::BITCAST, MVT::i64, Legal); 354 setOperationAction(ISD::BITCAST, MVT::f64, Legal); 355 356 // We cannot sextinreg(i1). Expand to shifts. 357 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 358 359 // We want to legalize GlobalAddress and ConstantPool nodes into the 360 // appropriate instructions to materialize the address. 361 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128; 362 ++sctype) { 363 MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype; 364 365 setOperationAction(ISD::GlobalAddress, VT, Custom); 366 setOperationAction(ISD::ConstantPool, VT, Custom); 367 setOperationAction(ISD::JumpTable, VT, Custom); 368 } 369 370 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 371 setOperationAction(ISD::VASTART , MVT::Other, Custom); 372 373 // Use the default implementation. 374 setOperationAction(ISD::VAARG , MVT::Other, Expand); 375 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 376 setOperationAction(ISD::VAEND , MVT::Other, Expand); 377 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 378 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); 379 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 380 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand); 381 382 // Cell SPU has instructions for converting between i64 and fp. 383 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 384 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 385 386 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT 387 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); 388 389 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 390 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 391 392 // First set operation action for all vector types to expand. Then we 393 // will selectively turn on ones that can be effectively codegen'd. 394 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass); 395 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass); 396 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass); 397 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass); 398 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass); 399 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass); 400 401 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 402 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { 403 MVT::SimpleValueType VT = (MVT::SimpleValueType)i; 404 405 // Set operation actions to legal types only. 406 if (!isTypeLegal(VT)) continue; 407 408 // add/sub are legal for all supported vector VT's. 409 setOperationAction(ISD::ADD, VT, Legal); 410 setOperationAction(ISD::SUB, VT, Legal); 411 // mul has to be custom lowered. 412 setOperationAction(ISD::MUL, VT, Legal); 413 414 setOperationAction(ISD::AND, VT, Legal); 415 setOperationAction(ISD::OR, VT, Legal); 416 setOperationAction(ISD::XOR, VT, Legal); 417 setOperationAction(ISD::LOAD, VT, Custom); 418 setOperationAction(ISD::SELECT, VT, Legal); 419 setOperationAction(ISD::STORE, VT, Custom); 420 421 // These operations need to be expanded: 422 setOperationAction(ISD::SDIV, VT, Expand); 423 setOperationAction(ISD::SREM, VT, Expand); 424 setOperationAction(ISD::UDIV, VT, Expand); 425 setOperationAction(ISD::UREM, VT, Expand); 426 427 // Custom lower build_vector, constant pool spills, insert and 428 // extract vector elements: 429 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 430 setOperationAction(ISD::ConstantPool, VT, Custom); 431 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); 432 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 433 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 434 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 435 } 436 437 setOperationAction(ISD::AND, MVT::v16i8, Custom); 438 setOperationAction(ISD::OR, MVT::v16i8, Custom); 439 setOperationAction(ISD::XOR, MVT::v16i8, Custom); 440 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 441 442 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 443 444 setBooleanContents(ZeroOrNegativeOneBooleanContent); 445 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // FIXME: Is this correct? 446 447 setStackPointerRegisterToSaveRestore(SPU::R1); 448 449 // We have target-specific dag combine patterns for the following nodes: 450 setTargetDAGCombine(ISD::ADD); 451 setTargetDAGCombine(ISD::ZERO_EXTEND); 452 setTargetDAGCombine(ISD::SIGN_EXTEND); 453 setTargetDAGCombine(ISD::ANY_EXTEND); 454 455 setMinFunctionAlignment(3); 456 457 computeRegisterProperties(); 458 459 // Set pre-RA register scheduler default to BURR, which produces slightly 460 // better code than the default (could also be TDRR, but TargetLowering.h 461 // needs a mod to support that model): 462 setSchedulingPreference(Sched::RegPressure); 463 } 464 465 const char * 466 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const 467 { 468 if (node_names.empty()) { 469 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG"; 470 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi"; 471 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo"; 472 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr"; 473 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr"; 474 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr"; 475 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT"; 476 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL"; 477 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB"; 478 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK"; 479 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB"; 480 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC"; 481 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT"; 482 node_names[(unsigned) SPUISD::SHL_BITS] = "SPUISD::SHL_BITS"; 483 node_names[(unsigned) SPUISD::SHL_BYTES] = "SPUISD::SHL_BYTES"; 484 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL"; 485 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR"; 486 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT"; 487 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] = 488 "SPUISD::ROTBYTES_LEFT_BITS"; 489 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK"; 490 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB"; 491 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER"; 492 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER"; 493 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER"; 494 } 495 496 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode); 497 498 return ((i != node_names.end()) ? i->second : 0); 499 } 500 501 //===----------------------------------------------------------------------===// 502 // Return the Cell SPU's SETCC result type 503 //===----------------------------------------------------------------------===// 504 505 EVT SPUTargetLowering::getSetCCResultType(EVT VT) const { 506 // i8, i16 and i32 are valid SETCC result types 507 MVT::SimpleValueType retval; 508 509 switch(VT.getSimpleVT().SimpleTy){ 510 case MVT::i1: 511 case MVT::i8: 512 retval = MVT::i8; break; 513 case MVT::i16: 514 retval = MVT::i16; break; 515 case MVT::i32: 516 default: 517 retval = MVT::i32; 518 } 519 return retval; 520 } 521 522 //===----------------------------------------------------------------------===// 523 // Calling convention code: 524 //===----------------------------------------------------------------------===// 525 526 #include "SPUGenCallingConv.inc" 527 528 //===----------------------------------------------------------------------===// 529 // LowerOperation implementation 530 //===----------------------------------------------------------------------===// 531 532 /// Custom lower loads for CellSPU 533 /*! 534 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements 535 within a 16-byte block, we have to rotate to extract the requested element. 536 537 For extending loads, we also want to ensure that the following sequence is 538 emitted, e.g. for MVT::f32 extending load to MVT::f64: 539 540 \verbatim 541 %1 v16i8,ch = load 542 %2 v16i8,ch = rotate %1 543 %3 v4f8, ch = bitconvert %2 544 %4 f32 = vec2perfslot %3 545 %5 f64 = fp_extend %4 546 \endverbatim 547 */ 548 static SDValue 549 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 550 LoadSDNode *LN = cast<LoadSDNode>(Op); 551 SDValue the_chain = LN->getChain(); 552 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 553 EVT InVT = LN->getMemoryVT(); 554 EVT OutVT = Op.getValueType(); 555 ISD::LoadExtType ExtType = LN->getExtensionType(); 556 unsigned alignment = LN->getAlignment(); 557 int pso = prefslotOffset(InVT); 558 DebugLoc dl = Op.getDebugLoc(); 559 EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT, 560 (128 / InVT.getSizeInBits())); 561 562 // two sanity checks 563 assert( LN->getAddressingMode() == ISD::UNINDEXED 564 && "we should get only UNINDEXED adresses"); 565 // clean aligned loads can be selected as-is 566 if (InVT.getSizeInBits() == 128 && (alignment%16) == 0) 567 return SDValue(); 568 569 // Get pointerinfos to the memory chunk(s) that contain the data to load 570 uint64_t mpi_offset = LN->getPointerInfo().Offset; 571 mpi_offset -= mpi_offset%16; 572 MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset); 573 MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16); 574 575 SDValue result; 576 SDValue basePtr = LN->getBasePtr(); 577 SDValue rotate; 578 579 if ((alignment%16) == 0) { 580 ConstantSDNode *CN; 581 582 // Special cases for a known aligned load to simplify the base pointer 583 // and the rotation amount: 584 if (basePtr.getOpcode() == ISD::ADD 585 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) { 586 // Known offset into basePtr 587 int64_t offset = CN->getSExtValue(); 588 int64_t rotamt = int64_t((offset & 0xf) - pso); 589 590 if (rotamt < 0) 591 rotamt += 16; 592 593 rotate = DAG.getConstant(rotamt, MVT::i16); 594 595 // Simplify the base pointer for this case: 596 basePtr = basePtr.getOperand(0); 597 if ((offset & ~0xf) > 0) { 598 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 599 basePtr, 600 DAG.getConstant((offset & ~0xf), PtrVT)); 601 } 602 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr) 603 || (basePtr.getOpcode() == SPUISD::IndirectAddr 604 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi 605 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) { 606 // Plain aligned a-form address: rotate into preferred slot 607 // Same for (SPUindirect (SPUhi ...), (SPUlo ...)) 608 int64_t rotamt = -pso; 609 if (rotamt < 0) 610 rotamt += 16; 611 rotate = DAG.getConstant(rotamt, MVT::i16); 612 } else { 613 // Offset the rotate amount by the basePtr and the preferred slot 614 // byte offset 615 int64_t rotamt = -pso; 616 if (rotamt < 0) 617 rotamt += 16; 618 rotate = DAG.getNode(ISD::ADD, dl, PtrVT, 619 basePtr, 620 DAG.getConstant(rotamt, PtrVT)); 621 } 622 } else { 623 // Unaligned load: must be more pessimistic about addressing modes: 624 if (basePtr.getOpcode() == ISD::ADD) { 625 MachineFunction &MF = DAG.getMachineFunction(); 626 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 627 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); 628 SDValue Flag; 629 630 SDValue Op0 = basePtr.getOperand(0); 631 SDValue Op1 = basePtr.getOperand(1); 632 633 if (isa<ConstantSDNode>(Op1)) { 634 // Convert the (add <ptr>, <const>) to an indirect address contained 635 // in a register. Note that this is done because we need to avoid 636 // creating a 0(reg) d-form address due to the SPU's block loads. 637 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); 638 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); 639 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); 640 } else { 641 // Convert the (add <arg1>, <arg2>) to an indirect address, which 642 // will likely be lowered as a reg(reg) x-form address. 643 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); 644 } 645 } else { 646 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 647 basePtr, 648 DAG.getConstant(0, PtrVT)); 649 } 650 651 // Offset the rotate amount by the basePtr and the preferred slot 652 // byte offset 653 rotate = DAG.getNode(ISD::ADD, dl, PtrVT, 654 basePtr, 655 DAG.getConstant(-pso, PtrVT)); 656 } 657 658 // Do the load as a i128 to allow possible shifting 659 SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr, 660 lowMemPtr, 661 LN->isVolatile(), LN->isNonTemporal(), 16); 662 663 // When the size is not greater than alignment we get all data with just 664 // one load 665 if (alignment >= InVT.getSizeInBits()/8) { 666 // Update the chain 667 the_chain = low.getValue(1); 668 669 // Rotate into the preferred slot: 670 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128, 671 low.getValue(0), rotate); 672 673 // Convert the loaded v16i8 vector to the appropriate vector type 674 // specified by the operand: 675 EVT vecVT = EVT::getVectorVT(*DAG.getContext(), 676 InVT, (128 / InVT.getSizeInBits())); 677 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, 678 DAG.getNode(ISD::BITCAST, dl, vecVT, result)); 679 } 680 // When alignment is less than the size, we might need (known only at 681 // run-time) two loads 682 // TODO: if the memory address is composed only from constants, we have 683 // extra kowledge, and might avoid the second load 684 else { 685 // storage position offset from lower 16 byte aligned memory chunk 686 SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32, 687 basePtr, DAG.getConstant( 0xf, MVT::i32 ) ); 688 // get a registerfull of ones. (this implementation is a workaround: LLVM 689 // cannot handle 128 bit signed int constants) 690 SDValue ones = DAG.getConstant(-1, MVT::v4i32 ); 691 ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones); 692 693 SDValue high = DAG.getLoad(MVT::i128, dl, the_chain, 694 DAG.getNode(ISD::ADD, dl, PtrVT, 695 basePtr, 696 DAG.getConstant(16, PtrVT)), 697 highMemPtr, 698 LN->isVolatile(), LN->isNonTemporal(), 16); 699 700 the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1), 701 high.getValue(1)); 702 703 // Shift the (possible) high part right to compensate the misalignemnt. 704 // if there is no highpart (i.e. value is i64 and offset is 4), this 705 // will zero out the high value. 706 high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high, 707 DAG.getNode(ISD::SUB, dl, MVT::i32, 708 DAG.getConstant( 16, MVT::i32), 709 offset 710 )); 711 712 // Shift the low similarly 713 // TODO: add SPUISD::SHL_BYTES 714 low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset ); 715 716 // Merge the two parts 717 result = DAG.getNode(ISD::BITCAST, dl, vecVT, 718 DAG.getNode(ISD::OR, dl, MVT::i128, low, high)); 719 720 if (!InVT.isVector()) { 721 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result ); 722 } 723 724 } 725 // Handle extending loads by extending the scalar result: 726 if (ExtType == ISD::SEXTLOAD) { 727 result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result); 728 } else if (ExtType == ISD::ZEXTLOAD) { 729 result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result); 730 } else if (ExtType == ISD::EXTLOAD) { 731 unsigned NewOpc = ISD::ANY_EXTEND; 732 733 if (OutVT.isFloatingPoint()) 734 NewOpc = ISD::FP_EXTEND; 735 736 result = DAG.getNode(NewOpc, dl, OutVT, result); 737 } 738 739 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other); 740 SDValue retops[2] = { 741 result, 742 the_chain 743 }; 744 745 result = DAG.getNode(SPUISD::LDRESULT, dl, retvts, 746 retops, sizeof(retops) / sizeof(retops[0])); 747 return result; 748 } 749 750 /// Custom lower stores for CellSPU 751 /*! 752 All CellSPU stores are aligned to 16-byte boundaries, so for elements 753 within a 16-byte block, we have to generate a shuffle to insert the 754 requested element into its place, then store the resulting block. 755 */ 756 static SDValue 757 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 758 StoreSDNode *SN = cast<StoreSDNode>(Op); 759 SDValue Value = SN->getValue(); 760 EVT VT = Value.getValueType(); 761 EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT()); 762 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 763 DebugLoc dl = Op.getDebugLoc(); 764 unsigned alignment = SN->getAlignment(); 765 SDValue result; 766 EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT, 767 (128 / StVT.getSizeInBits())); 768 // Get pointerinfos to the memory chunk(s) that contain the data to load 769 uint64_t mpi_offset = SN->getPointerInfo().Offset; 770 mpi_offset -= mpi_offset%16; 771 MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset); 772 MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16); 773 774 775 // two sanity checks 776 assert( SN->getAddressingMode() == ISD::UNINDEXED 777 && "we should get only UNINDEXED adresses"); 778 // clean aligned loads can be selected as-is 779 if (StVT.getSizeInBits() == 128 && (alignment%16) == 0) 780 return SDValue(); 781 782 SDValue alignLoadVec; 783 SDValue basePtr = SN->getBasePtr(); 784 SDValue the_chain = SN->getChain(); 785 SDValue insertEltOffs; 786 787 if ((alignment%16) == 0) { 788 ConstantSDNode *CN; 789 // Special cases for a known aligned load to simplify the base pointer 790 // and insertion byte: 791 if (basePtr.getOpcode() == ISD::ADD 792 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) { 793 // Known offset into basePtr 794 int64_t offset = CN->getSExtValue(); 795 796 // Simplify the base pointer for this case: 797 basePtr = basePtr.getOperand(0); 798 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 799 basePtr, 800 DAG.getConstant((offset & 0xf), PtrVT)); 801 802 if ((offset & ~0xf) > 0) { 803 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 804 basePtr, 805 DAG.getConstant((offset & ~0xf), PtrVT)); 806 } 807 } else { 808 // Otherwise, assume it's at byte 0 of basePtr 809 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 810 basePtr, 811 DAG.getConstant(0, PtrVT)); 812 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 813 basePtr, 814 DAG.getConstant(0, PtrVT)); 815 } 816 } else { 817 // Unaligned load: must be more pessimistic about addressing modes: 818 if (basePtr.getOpcode() == ISD::ADD) { 819 MachineFunction &MF = DAG.getMachineFunction(); 820 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 821 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); 822 SDValue Flag; 823 824 SDValue Op0 = basePtr.getOperand(0); 825 SDValue Op1 = basePtr.getOperand(1); 826 827 if (isa<ConstantSDNode>(Op1)) { 828 // Convert the (add <ptr>, <const>) to an indirect address contained 829 // in a register. Note that this is done because we need to avoid 830 // creating a 0(reg) d-form address due to the SPU's block loads. 831 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); 832 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); 833 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); 834 } else { 835 // Convert the (add <arg1>, <arg2>) to an indirect address, which 836 // will likely be lowered as a reg(reg) x-form address. 837 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); 838 } 839 } else { 840 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 841 basePtr, 842 DAG.getConstant(0, PtrVT)); 843 } 844 845 // Insertion point is solely determined by basePtr's contents 846 insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT, 847 basePtr, 848 DAG.getConstant(0, PtrVT)); 849 } 850 851 // Load the lower part of the memory to which to store. 852 SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr, 853 lowMemPtr, SN->isVolatile(), SN->isNonTemporal(), 16); 854 855 // if we don't need to store over the 16 byte boundary, one store suffices 856 if (alignment >= StVT.getSizeInBits()/8) { 857 // Update the chain 858 the_chain = low.getValue(1); 859 860 LoadSDNode *LN = cast<LoadSDNode>(low); 861 SDValue theValue = SN->getValue(); 862 863 if (StVT != VT 864 && (theValue.getOpcode() == ISD::AssertZext 865 || theValue.getOpcode() == ISD::AssertSext)) { 866 // Drill down and get the value for zero- and sign-extended 867 // quantities 868 theValue = theValue.getOperand(0); 869 } 870 871 // If the base pointer is already a D-form address, then just create 872 // a new D-form address with a slot offset and the orignal base pointer. 873 // Otherwise generate a D-form address with the slot offset relative 874 // to the stack pointer, which is always aligned. 875 #if !defined(NDEBUG) 876 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 877 errs() << "CellSPU LowerSTORE: basePtr = "; 878 basePtr.getNode()->dump(&DAG); 879 errs() << "\n"; 880 } 881 #endif 882 883 SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, 884 insertEltOffs); 885 SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, 886 theValue); 887 888 result = DAG.getNode(SPUISD::SHUFB, dl, vecVT, 889 vectorizeOp, low, 890 DAG.getNode(ISD::BITCAST, dl, 891 MVT::v4i32, insertEltOp)); 892 893 result = DAG.getStore(the_chain, dl, result, basePtr, 894 lowMemPtr, 895 LN->isVolatile(), LN->isNonTemporal(), 896 16); 897 898 } 899 // do the store when it might cross the 16 byte memory access boundary. 900 else { 901 // TODO issue a warning if SN->isVolatile()== true? This is likely not 902 // what the user wanted. 903 904 // address offset from nearest lower 16byte alinged address 905 SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32, 906 SN->getBasePtr(), 907 DAG.getConstant(0xf, MVT::i32)); 908 // 16 - offset 909 SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32, 910 DAG.getConstant( 16, MVT::i32), 911 offset); 912 // 16 - sizeof(Value) 913 SDValue surplus = DAG.getNode(ISD::SUB, dl, MVT::i32, 914 DAG.getConstant( 16, MVT::i32), 915 DAG.getConstant( VT.getSizeInBits()/8, 916 MVT::i32)); 917 // get a registerfull of ones 918 SDValue ones = DAG.getConstant(-1, MVT::v4i32); 919 ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones); 920 921 // Create the 128 bit masks that have ones where the data to store is 922 // located. 923 SDValue lowmask, himask; 924 // if the value to store don't fill up the an entire 128 bits, zero 925 // out the last bits of the mask so that only the value we want to store 926 // is masked. 927 // this is e.g. in the case of store i32, align 2 928 if (!VT.isVector()){ 929 Value = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, Value); 930 lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, ones, surplus); 931 lowmask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask, 932 surplus); 933 Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value); 934 Value = DAG.getNode(ISD::AND, dl, MVT::i128, Value, lowmask); 935 936 } 937 else { 938 lowmask = ones; 939 Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value); 940 } 941 // this will zero, if there are no data that goes to the high quad 942 himask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask, 943 offset_compl); 944 lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, lowmask, 945 offset); 946 947 // Load in the old data and zero out the parts that will be overwritten with 948 // the new data to store. 949 SDValue hi = DAG.getLoad(MVT::i128, dl, the_chain, 950 DAG.getNode(ISD::ADD, dl, PtrVT, basePtr, 951 DAG.getConstant( 16, PtrVT)), 952 highMemPtr, 953 SN->isVolatile(), SN->isNonTemporal(), 16); 954 the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1), 955 hi.getValue(1)); 956 957 low = DAG.getNode(ISD::AND, dl, MVT::i128, 958 DAG.getNode( ISD::BITCAST, dl, MVT::i128, low), 959 DAG.getNode( ISD::XOR, dl, MVT::i128, lowmask, ones)); 960 hi = DAG.getNode(ISD::AND, dl, MVT::i128, 961 DAG.getNode( ISD::BITCAST, dl, MVT::i128, hi), 962 DAG.getNode( ISD::XOR, dl, MVT::i128, himask, ones)); 963 964 // Shift the Value to store into place. rlow contains the parts that go to 965 // the lower memory chunk, rhi has the parts that go to the upper one. 966 SDValue rlow = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, Value, offset); 967 rlow = DAG.getNode(ISD::AND, dl, MVT::i128, rlow, lowmask); 968 SDValue rhi = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, Value, 969 offset_compl); 970 971 // Merge the old data and the new data and store the results 972 // Need to convert vectors here to integer as 'OR'ing floats assert 973 rlow = DAG.getNode(ISD::OR, dl, MVT::i128, 974 DAG.getNode(ISD::BITCAST, dl, MVT::i128, low), 975 DAG.getNode(ISD::BITCAST, dl, MVT::i128, rlow)); 976 rhi = DAG.getNode(ISD::OR, dl, MVT::i128, 977 DAG.getNode(ISD::BITCAST, dl, MVT::i128, hi), 978 DAG.getNode(ISD::BITCAST, dl, MVT::i128, rhi)); 979 980 low = DAG.getStore(the_chain, dl, rlow, basePtr, 981 lowMemPtr, 982 SN->isVolatile(), SN->isNonTemporal(), 16); 983 hi = DAG.getStore(the_chain, dl, rhi, 984 DAG.getNode(ISD::ADD, dl, PtrVT, basePtr, 985 DAG.getConstant( 16, PtrVT)), 986 highMemPtr, 987 SN->isVolatile(), SN->isNonTemporal(), 16); 988 result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(0), 989 hi.getValue(0)); 990 } 991 992 return result; 993 } 994 995 //! Generate the address of a constant pool entry. 996 static SDValue 997 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 998 EVT PtrVT = Op.getValueType(); 999 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1000 const Constant *C = CP->getConstVal(); 1001 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); 1002 SDValue Zero = DAG.getConstant(0, PtrVT); 1003 const TargetMachine &TM = DAG.getTarget(); 1004 // FIXME there is no actual debug info here 1005 DebugLoc dl = Op.getDebugLoc(); 1006 1007 if (TM.getRelocationModel() == Reloc::Static) { 1008 if (!ST->usingLargeMem()) { 1009 // Just return the SDValue with the constant pool address in it. 1010 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero); 1011 } else { 1012 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero); 1013 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero); 1014 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); 1015 } 1016 } 1017 1018 llvm_unreachable("LowerConstantPool: Relocation model other than static" 1019 " not supported."); 1020 return SDValue(); 1021 } 1022 1023 //! Alternate entry point for generating the address of a constant pool entry 1024 SDValue 1025 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) { 1026 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl()); 1027 } 1028 1029 static SDValue 1030 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 1031 EVT PtrVT = Op.getValueType(); 1032 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 1033 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 1034 SDValue Zero = DAG.getConstant(0, PtrVT); 1035 const TargetMachine &TM = DAG.getTarget(); 1036 // FIXME there is no actual debug info here 1037 DebugLoc dl = Op.getDebugLoc(); 1038 1039 if (TM.getRelocationModel() == Reloc::Static) { 1040 if (!ST->usingLargeMem()) { 1041 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero); 1042 } else { 1043 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero); 1044 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero); 1045 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); 1046 } 1047 } 1048 1049 llvm_unreachable("LowerJumpTable: Relocation model other than static" 1050 " not supported."); 1051 return SDValue(); 1052 } 1053 1054 static SDValue 1055 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 1056 EVT PtrVT = Op.getValueType(); 1057 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 1058 const GlobalValue *GV = GSDN->getGlobal(); 1059 SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), 1060 PtrVT, GSDN->getOffset()); 1061 const TargetMachine &TM = DAG.getTarget(); 1062 SDValue Zero = DAG.getConstant(0, PtrVT); 1063 // FIXME there is no actual debug info here 1064 DebugLoc dl = Op.getDebugLoc(); 1065 1066 if (TM.getRelocationModel() == Reloc::Static) { 1067 if (!ST->usingLargeMem()) { 1068 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero); 1069 } else { 1070 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero); 1071 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero); 1072 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); 1073 } 1074 } else { 1075 report_fatal_error("LowerGlobalAddress: Relocation model other than static" 1076 "not supported."); 1077 /*NOTREACHED*/ 1078 } 1079 1080 return SDValue(); 1081 } 1082 1083 //! Custom lower double precision floating point constants 1084 static SDValue 1085 LowerConstantFP(SDValue Op, SelectionDAG &DAG) { 1086 EVT VT = Op.getValueType(); 1087 // FIXME there is no actual debug info here 1088 DebugLoc dl = Op.getDebugLoc(); 1089 1090 if (VT == MVT::f64) { 1091 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode()); 1092 1093 assert((FP != 0) && 1094 "LowerConstantFP: Node is not ConstantFPSDNode"); 1095 1096 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble()); 1097 SDValue T = DAG.getConstant(dbits, MVT::i64); 1098 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T); 1099 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, 1100 DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Tvec)); 1101 } 1102 1103 return SDValue(); 1104 } 1105 1106 SDValue 1107 SPUTargetLowering::LowerFormalArguments(SDValue Chain, 1108 CallingConv::ID CallConv, bool isVarArg, 1109 const SmallVectorImpl<ISD::InputArg> 1110 &Ins, 1111 DebugLoc dl, SelectionDAG &DAG, 1112 SmallVectorImpl<SDValue> &InVals) 1113 const { 1114 1115 MachineFunction &MF = DAG.getMachineFunction(); 1116 MachineFrameInfo *MFI = MF.getFrameInfo(); 1117 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1118 SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>(); 1119 1120 unsigned ArgOffset = SPUFrameLowering::minStackSize(); 1121 unsigned ArgRegIdx = 0; 1122 unsigned StackSlotSize = SPUFrameLowering::stackSlotSize(); 1123 1124 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1125 1126 SmallVector<CCValAssign, 16> ArgLocs; 1127 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1128 getTargetMachine(), ArgLocs, *DAG.getContext()); 1129 // FIXME: allow for other calling conventions 1130 CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU); 1131 1132 // Add DAG nodes to load the arguments or copy them out of registers. 1133 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 1134 EVT ObjectVT = Ins[ArgNo].VT; 1135 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 1136 SDValue ArgVal; 1137 CCValAssign &VA = ArgLocs[ArgNo]; 1138 1139 if (VA.isRegLoc()) { 1140 const TargetRegisterClass *ArgRegClass; 1141 1142 switch (ObjectVT.getSimpleVT().SimpleTy) { 1143 default: 1144 report_fatal_error("LowerFormalArguments Unhandled argument type: " + 1145 Twine(ObjectVT.getEVTString())); 1146 case MVT::i8: 1147 ArgRegClass = &SPU::R8CRegClass; 1148 break; 1149 case MVT::i16: 1150 ArgRegClass = &SPU::R16CRegClass; 1151 break; 1152 case MVT::i32: 1153 ArgRegClass = &SPU::R32CRegClass; 1154 break; 1155 case MVT::i64: 1156 ArgRegClass = &SPU::R64CRegClass; 1157 break; 1158 case MVT::i128: 1159 ArgRegClass = &SPU::GPRCRegClass; 1160 break; 1161 case MVT::f32: 1162 ArgRegClass = &SPU::R32FPRegClass; 1163 break; 1164 case MVT::f64: 1165 ArgRegClass = &SPU::R64FPRegClass; 1166 break; 1167 case MVT::v2f64: 1168 case MVT::v4f32: 1169 case MVT::v2i64: 1170 case MVT::v4i32: 1171 case MVT::v8i16: 1172 case MVT::v16i8: 1173 ArgRegClass = &SPU::VECREGRegClass; 1174 break; 1175 } 1176 1177 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass); 1178 RegInfo.addLiveIn(VA.getLocReg(), VReg); 1179 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 1180 ++ArgRegIdx; 1181 } else { 1182 // We need to load the argument to a virtual register if we determined 1183 // above that we ran out of physical registers of the appropriate type 1184 // or we're forced to do vararg 1185 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true); 1186 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 1187 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), 1188 false, false, 0); 1189 ArgOffset += StackSlotSize; 1190 } 1191 1192 InVals.push_back(ArgVal); 1193 // Update the chain 1194 Chain = ArgVal.getOperand(0); 1195 } 1196 1197 // vararg handling: 1198 if (isVarArg) { 1199 // FIXME: we should be able to query the argument registers from 1200 // tablegen generated code. 1201 static const unsigned ArgRegs[] = { 1202 SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9, 1203 SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16, 1204 SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23, 1205 SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30, 1206 SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37, 1207 SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44, 1208 SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51, 1209 SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58, 1210 SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65, 1211 SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72, 1212 SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79 1213 }; 1214 // size of ArgRegs array 1215 unsigned NumArgRegs = 77; 1216 1217 // We will spill (79-3)+1 registers to the stack 1218 SmallVector<SDValue, 79-3+1> MemOps; 1219 1220 // Create the frame slot 1221 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) { 1222 FuncInfo->setVarArgsFrameIndex( 1223 MFI->CreateFixedObject(StackSlotSize, ArgOffset, true)); 1224 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 1225 unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::VECREGRegClass); 1226 SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8); 1227 SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(), 1228 false, false, 0); 1229 Chain = Store.getOperand(0); 1230 MemOps.push_back(Store); 1231 1232 // Increment address by stack slot size for the next stored argument 1233 ArgOffset += StackSlotSize; 1234 } 1235 if (!MemOps.empty()) 1236 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1237 &MemOps[0], MemOps.size()); 1238 } 1239 1240 return Chain; 1241 } 1242 1243 /// isLSAAddress - Return the immediate to use if the specified 1244 /// value is representable as a LSA address. 1245 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) { 1246 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 1247 if (!C) return 0; 1248 1249 int Addr = C->getZExtValue(); 1250 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 1251 (Addr << 14 >> 14) != Addr) 1252 return 0; // Top 14 bits have to be sext of immediate. 1253 1254 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode(); 1255 } 1256 1257 SDValue 1258 SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, 1259 CallingConv::ID CallConv, bool isVarArg, 1260 bool &isTailCall, 1261 const SmallVectorImpl<ISD::OutputArg> &Outs, 1262 const SmallVectorImpl<SDValue> &OutVals, 1263 const SmallVectorImpl<ISD::InputArg> &Ins, 1264 DebugLoc dl, SelectionDAG &DAG, 1265 SmallVectorImpl<SDValue> &InVals) const { 1266 // CellSPU target does not yet support tail call optimization. 1267 isTailCall = false; 1268 1269 const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); 1270 unsigned NumOps = Outs.size(); 1271 unsigned StackSlotSize = SPUFrameLowering::stackSlotSize(); 1272 1273 SmallVector<CCValAssign, 16> ArgLocs; 1274 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1275 getTargetMachine(), ArgLocs, *DAG.getContext()); 1276 // FIXME: allow for other calling conventions 1277 CCInfo.AnalyzeCallOperands(Outs, CCC_SPU); 1278 1279 const unsigned NumArgRegs = ArgLocs.size(); 1280 1281 1282 // Handy pointer type 1283 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1284 1285 // Set up a copy of the stack pointer for use loading and storing any 1286 // arguments that may not fit in the registers available for argument 1287 // passing. 1288 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32); 1289 1290 // Figure out which arguments are going to go in registers, and which in 1291 // memory. 1292 unsigned ArgOffset = SPUFrameLowering::minStackSize(); // Just below [LR] 1293 unsigned ArgRegIdx = 0; 1294 1295 // Keep track of registers passing arguments 1296 std::vector<std::pair<unsigned, SDValue> > RegsToPass; 1297 // And the arguments passed on the stack 1298 SmallVector<SDValue, 8> MemOpChains; 1299 1300 for (; ArgRegIdx != NumOps; ++ArgRegIdx) { 1301 SDValue Arg = OutVals[ArgRegIdx]; 1302 CCValAssign &VA = ArgLocs[ArgRegIdx]; 1303 1304 // PtrOff will be used to store the current argument to the stack if a 1305 // register cannot be found for it. 1306 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 1307 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 1308 1309 switch (Arg.getValueType().getSimpleVT().SimpleTy) { 1310 default: llvm_unreachable("Unexpected ValueType for argument!"); 1311 case MVT::i8: 1312 case MVT::i16: 1313 case MVT::i32: 1314 case MVT::i64: 1315 case MVT::i128: 1316 case MVT::f32: 1317 case MVT::f64: 1318 case MVT::v2i64: 1319 case MVT::v2f64: 1320 case MVT::v4f32: 1321 case MVT::v4i32: 1322 case MVT::v8i16: 1323 case MVT::v16i8: 1324 if (ArgRegIdx != NumArgRegs) { 1325 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1326 } else { 1327 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 1328 MachinePointerInfo(), 1329 false, false, 0)); 1330 ArgOffset += StackSlotSize; 1331 } 1332 break; 1333 } 1334 } 1335 1336 // Accumulate how many bytes are to be pushed on the stack, including the 1337 // linkage area, and parameter passing area. According to the SPU ABI, 1338 // we minimally need space for [LR] and [SP]. 1339 unsigned NumStackBytes = ArgOffset - SPUFrameLowering::minStackSize(); 1340 1341 // Insert a call sequence start 1342 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes, 1343 true)); 1344 1345 if (!MemOpChains.empty()) { 1346 // Adjust the stack pointer for the stack arguments. 1347 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1348 &MemOpChains[0], MemOpChains.size()); 1349 } 1350 1351 // Build a sequence of copy-to-reg nodes chained together with token chain 1352 // and flag operands which copy the outgoing args into the appropriate regs. 1353 SDValue InFlag; 1354 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1355 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1356 RegsToPass[i].second, InFlag); 1357 InFlag = Chain.getValue(1); 1358 } 1359 1360 SmallVector<SDValue, 8> Ops; 1361 unsigned CallOpc = SPUISD::CALL; 1362 1363 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 1364 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1365 // node so that legalize doesn't hack it. 1366 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1367 const GlobalValue *GV = G->getGlobal(); 1368 EVT CalleeVT = Callee.getValueType(); 1369 SDValue Zero = DAG.getConstant(0, PtrVT); 1370 SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT); 1371 1372 if (!ST->usingLargeMem()) { 1373 // Turn calls to targets that are defined (i.e., have bodies) into BRSL 1374 // style calls, otherwise, external symbols are BRASL calls. This assumes 1375 // that declared/defined symbols are in the same compilation unit and can 1376 // be reached through PC-relative jumps. 1377 // 1378 // NOTE: 1379 // This may be an unsafe assumption for JIT and really large compilation 1380 // units. 1381 if (GV->isDeclaration()) { 1382 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero); 1383 } else { 1384 Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero); 1385 } 1386 } else { 1387 // "Large memory" mode: Turn all calls into indirect calls with a X-form 1388 // address pairs: 1389 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero); 1390 } 1391 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1392 EVT CalleeVT = Callee.getValueType(); 1393 SDValue Zero = DAG.getConstant(0, PtrVT); 1394 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(), 1395 Callee.getValueType()); 1396 1397 if (!ST->usingLargeMem()) { 1398 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero); 1399 } else { 1400 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero); 1401 } 1402 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) { 1403 // If this is an absolute destination address that appears to be a legal 1404 // local store address, use the munged value. 1405 Callee = SDValue(Dest, 0); 1406 } 1407 1408 Ops.push_back(Chain); 1409 Ops.push_back(Callee); 1410 1411 // Add argument registers to the end of the list so that they are known live 1412 // into the call. 1413 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1414 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1415 RegsToPass[i].second.getValueType())); 1416 1417 if (InFlag.getNode()) 1418 Ops.push_back(InFlag); 1419 // Returns a chain and a flag for retval copy to use. 1420 Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Glue), 1421 &Ops[0], Ops.size()); 1422 InFlag = Chain.getValue(1); 1423 1424 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true), 1425 DAG.getIntPtrConstant(0, true), InFlag); 1426 if (!Ins.empty()) 1427 InFlag = Chain.getValue(1); 1428 1429 // If the function returns void, just return the chain. 1430 if (Ins.empty()) 1431 return Chain; 1432 1433 // Now handle the return value(s) 1434 SmallVector<CCValAssign, 16> RVLocs; 1435 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1436 getTargetMachine(), RVLocs, *DAG.getContext()); 1437 CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU); 1438 1439 1440 // If the call has results, copy the values out of the ret val registers. 1441 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1442 CCValAssign VA = RVLocs[i]; 1443 1444 SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), 1445 InFlag); 1446 Chain = Val.getValue(1); 1447 InFlag = Val.getValue(2); 1448 InVals.push_back(Val); 1449 } 1450 1451 return Chain; 1452 } 1453 1454 SDValue 1455 SPUTargetLowering::LowerReturn(SDValue Chain, 1456 CallingConv::ID CallConv, bool isVarArg, 1457 const SmallVectorImpl<ISD::OutputArg> &Outs, 1458 const SmallVectorImpl<SDValue> &OutVals, 1459 DebugLoc dl, SelectionDAG &DAG) const { 1460 1461 SmallVector<CCValAssign, 16> RVLocs; 1462 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1463 getTargetMachine(), RVLocs, *DAG.getContext()); 1464 CCInfo.AnalyzeReturn(Outs, RetCC_SPU); 1465 1466 // If this is the first return lowered for this function, add the regs to the 1467 // liveout set for the function. 1468 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 1469 for (unsigned i = 0; i != RVLocs.size(); ++i) 1470 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 1471 } 1472 1473 SDValue Flag; 1474 1475 // Copy the result values into the output registers. 1476 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1477 CCValAssign &VA = RVLocs[i]; 1478 assert(VA.isRegLoc() && "Can only return in registers!"); 1479 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 1480 OutVals[i], Flag); 1481 Flag = Chain.getValue(1); 1482 } 1483 1484 if (Flag.getNode()) 1485 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag); 1486 else 1487 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain); 1488 } 1489 1490 1491 //===----------------------------------------------------------------------===// 1492 // Vector related lowering: 1493 //===----------------------------------------------------------------------===// 1494 1495 static ConstantSDNode * 1496 getVecImm(SDNode *N) { 1497 SDValue OpVal(0, 0); 1498 1499 // Check to see if this buildvec has a single non-undef value in its elements. 1500 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1501 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 1502 if (OpVal.getNode() == 0) 1503 OpVal = N->getOperand(i); 1504 else if (OpVal != N->getOperand(i)) 1505 return 0; 1506 } 1507 1508 if (OpVal.getNode() != 0) { 1509 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 1510 return CN; 1511 } 1512 } 1513 1514 return 0; 1515 } 1516 1517 /// get_vec_i18imm - Test if this vector is a vector filled with the same value 1518 /// and the value fits into an unsigned 18-bit constant, and if so, return the 1519 /// constant 1520 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG, 1521 EVT ValueType) { 1522 if (ConstantSDNode *CN = getVecImm(N)) { 1523 uint64_t Value = CN->getZExtValue(); 1524 if (ValueType == MVT::i64) { 1525 uint64_t UValue = CN->getZExtValue(); 1526 uint32_t upper = uint32_t(UValue >> 32); 1527 uint32_t lower = uint32_t(UValue); 1528 if (upper != lower) 1529 return SDValue(); 1530 Value = Value >> 32; 1531 } 1532 if (Value <= 0x3ffff) 1533 return DAG.getTargetConstant(Value, ValueType); 1534 } 1535 1536 return SDValue(); 1537 } 1538 1539 /// get_vec_i16imm - Test if this vector is a vector filled with the same value 1540 /// and the value fits into a signed 16-bit constant, and if so, return the 1541 /// constant 1542 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG, 1543 EVT ValueType) { 1544 if (ConstantSDNode *CN = getVecImm(N)) { 1545 int64_t Value = CN->getSExtValue(); 1546 if (ValueType == MVT::i64) { 1547 uint64_t UValue = CN->getZExtValue(); 1548 uint32_t upper = uint32_t(UValue >> 32); 1549 uint32_t lower = uint32_t(UValue); 1550 if (upper != lower) 1551 return SDValue(); 1552 Value = Value >> 32; 1553 } 1554 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) { 1555 return DAG.getTargetConstant(Value, ValueType); 1556 } 1557 } 1558 1559 return SDValue(); 1560 } 1561 1562 /// get_vec_i10imm - Test if this vector is a vector filled with the same value 1563 /// and the value fits into a signed 10-bit constant, and if so, return the 1564 /// constant 1565 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG, 1566 EVT ValueType) { 1567 if (ConstantSDNode *CN = getVecImm(N)) { 1568 int64_t Value = CN->getSExtValue(); 1569 if (ValueType == MVT::i64) { 1570 uint64_t UValue = CN->getZExtValue(); 1571 uint32_t upper = uint32_t(UValue >> 32); 1572 uint32_t lower = uint32_t(UValue); 1573 if (upper != lower) 1574 return SDValue(); 1575 Value = Value >> 32; 1576 } 1577 if (isInt<10>(Value)) 1578 return DAG.getTargetConstant(Value, ValueType); 1579 } 1580 1581 return SDValue(); 1582 } 1583 1584 /// get_vec_i8imm - Test if this vector is a vector filled with the same value 1585 /// and the value fits into a signed 8-bit constant, and if so, return the 1586 /// constant. 1587 /// 1588 /// @note: The incoming vector is v16i8 because that's the only way we can load 1589 /// constant vectors. Thus, we test to see if the upper and lower bytes are the 1590 /// same value. 1591 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG, 1592 EVT ValueType) { 1593 if (ConstantSDNode *CN = getVecImm(N)) { 1594 int Value = (int) CN->getZExtValue(); 1595 if (ValueType == MVT::i16 1596 && Value <= 0xffff /* truncated from uint64_t */ 1597 && ((short) Value >> 8) == ((short) Value & 0xff)) 1598 return DAG.getTargetConstant(Value & 0xff, ValueType); 1599 else if (ValueType == MVT::i8 1600 && (Value & 0xff) == Value) 1601 return DAG.getTargetConstant(Value, ValueType); 1602 } 1603 1604 return SDValue(); 1605 } 1606 1607 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value 1608 /// and the value fits into a signed 16-bit constant, and if so, return the 1609 /// constant 1610 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG, 1611 EVT ValueType) { 1612 if (ConstantSDNode *CN = getVecImm(N)) { 1613 uint64_t Value = CN->getZExtValue(); 1614 if ((ValueType == MVT::i32 1615 && ((unsigned) Value & 0xffff0000) == (unsigned) Value) 1616 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value)) 1617 return DAG.getTargetConstant(Value >> 16, ValueType); 1618 } 1619 1620 return SDValue(); 1621 } 1622 1623 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors 1624 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) { 1625 if (ConstantSDNode *CN = getVecImm(N)) { 1626 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32); 1627 } 1628 1629 return SDValue(); 1630 } 1631 1632 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors 1633 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) { 1634 if (ConstantSDNode *CN = getVecImm(N)) { 1635 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64); 1636 } 1637 1638 return SDValue(); 1639 } 1640 1641 //! Lower a BUILD_VECTOR instruction creatively: 1642 static SDValue 1643 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { 1644 EVT VT = Op.getValueType(); 1645 EVT EltVT = VT.getVectorElementType(); 1646 DebugLoc dl = Op.getDebugLoc(); 1647 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 1648 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR"); 1649 unsigned minSplatBits = EltVT.getSizeInBits(); 1650 1651 if (minSplatBits < 16) 1652 minSplatBits = 16; 1653 1654 APInt APSplatBits, APSplatUndef; 1655 unsigned SplatBitSize; 1656 bool HasAnyUndefs; 1657 1658 if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, 1659 HasAnyUndefs, minSplatBits) 1660 || minSplatBits < SplatBitSize) 1661 return SDValue(); // Wasn't a constant vector or splat exceeded min 1662 1663 uint64_t SplatBits = APSplatBits.getZExtValue(); 1664 1665 switch (VT.getSimpleVT().SimpleTy) { 1666 default: 1667 report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " + 1668 Twine(VT.getEVTString())); 1669 /*NOTREACHED*/ 1670 case MVT::v4f32: { 1671 uint32_t Value32 = uint32_t(SplatBits); 1672 assert(SplatBitSize == 32 1673 && "LowerBUILD_VECTOR: Unexpected floating point vector element."); 1674 // NOTE: pretend the constant is an integer. LLVM won't load FP constants 1675 SDValue T = DAG.getConstant(Value32, MVT::i32); 1676 return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, 1677 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T)); 1678 break; 1679 } 1680 case MVT::v2f64: { 1681 uint64_t f64val = uint64_t(SplatBits); 1682 assert(SplatBitSize == 64 1683 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes."); 1684 // NOTE: pretend the constant is an integer. LLVM won't load FP constants 1685 SDValue T = DAG.getConstant(f64val, MVT::i64); 1686 return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, 1687 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T)); 1688 break; 1689 } 1690 case MVT::v16i8: { 1691 // 8-bit constants have to be expanded to 16-bits 1692 unsigned short Value16 = SplatBits /* | (SplatBits << 8) */; 1693 SmallVector<SDValue, 8> Ops; 1694 1695 Ops.assign(8, DAG.getConstant(Value16, MVT::i16)); 1696 return DAG.getNode(ISD::BITCAST, dl, VT, 1697 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size())); 1698 } 1699 case MVT::v8i16: { 1700 unsigned short Value16 = SplatBits; 1701 SDValue T = DAG.getConstant(Value16, EltVT); 1702 SmallVector<SDValue, 8> Ops; 1703 1704 Ops.assign(8, T); 1705 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); 1706 } 1707 case MVT::v4i32: { 1708 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); 1709 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T); 1710 } 1711 case MVT::v2i64: { 1712 return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl); 1713 } 1714 } 1715 1716 return SDValue(); 1717 } 1718 1719 /*! 1720 */ 1721 SDValue 1722 SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, 1723 DebugLoc dl) { 1724 uint32_t upper = uint32_t(SplatVal >> 32); 1725 uint32_t lower = uint32_t(SplatVal); 1726 1727 if (upper == lower) { 1728 // Magic constant that can be matched by IL, ILA, et. al. 1729 SDValue Val = DAG.getTargetConstant(upper, MVT::i32); 1730 return DAG.getNode(ISD::BITCAST, dl, OpVT, 1731 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 1732 Val, Val, Val, Val)); 1733 } else { 1734 bool upper_special, lower_special; 1735 1736 // NOTE: This code creates common-case shuffle masks that can be easily 1737 // detected as common expressions. It is not attempting to create highly 1738 // specialized masks to replace any and all 0's, 0xff's and 0x80's. 1739 1740 // Detect if the upper or lower half is a special shuffle mask pattern: 1741 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000); 1742 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000); 1743 1744 // Both upper and lower are special, lower to a constant pool load: 1745 if (lower_special && upper_special) { 1746 SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64); 1747 return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, 1748 SplatValCN, SplatValCN); 1749 } 1750 1751 SDValue LO32; 1752 SDValue HI32; 1753 SmallVector<SDValue, 16> ShufBytes; 1754 SDValue Result; 1755 1756 // Create lower vector if not a special pattern 1757 if (!lower_special) { 1758 SDValue LO32C = DAG.getConstant(lower, MVT::i32); 1759 LO32 = DAG.getNode(ISD::BITCAST, dl, OpVT, 1760 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 1761 LO32C, LO32C, LO32C, LO32C)); 1762 } 1763 1764 // Create upper vector if not a special pattern 1765 if (!upper_special) { 1766 SDValue HI32C = DAG.getConstant(upper, MVT::i32); 1767 HI32 = DAG.getNode(ISD::BITCAST, dl, OpVT, 1768 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 1769 HI32C, HI32C, HI32C, HI32C)); 1770 } 1771 1772 // If either upper or lower are special, then the two input operands are 1773 // the same (basically, one of them is a "don't care") 1774 if (lower_special) 1775 LO32 = HI32; 1776 if (upper_special) 1777 HI32 = LO32; 1778 1779 for (int i = 0; i < 4; ++i) { 1780 uint64_t val = 0; 1781 for (int j = 0; j < 4; ++j) { 1782 SDValue V; 1783 bool process_upper, process_lower; 1784 val <<= 8; 1785 process_upper = (upper_special && (i & 1) == 0); 1786 process_lower = (lower_special && (i & 1) == 1); 1787 1788 if (process_upper || process_lower) { 1789 if ((process_upper && upper == 0) 1790 || (process_lower && lower == 0)) 1791 val |= 0x80; 1792 else if ((process_upper && upper == 0xffffffff) 1793 || (process_lower && lower == 0xffffffff)) 1794 val |= 0xc0; 1795 else if ((process_upper && upper == 0x80000000) 1796 || (process_lower && lower == 0x80000000)) 1797 val |= (j == 0 ? 0xe0 : 0x80); 1798 } else 1799 val |= i * 4 + j + ((i & 1) * 16); 1800 } 1801 1802 ShufBytes.push_back(DAG.getConstant(val, MVT::i32)); 1803 } 1804 1805 return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32, 1806 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 1807 &ShufBytes[0], ShufBytes.size())); 1808 } 1809 } 1810 1811 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on 1812 /// which the Cell can operate. The code inspects V3 to ascertain whether the 1813 /// permutation vector, V3, is monotonically increasing with one "exception" 1814 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a 1815 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool. 1816 /// In either case, the net result is going to eventually invoke SHUFB to 1817 /// permute/shuffle the bytes from V1 and V2. 1818 /// \note 1819 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate 1820 /// control word for byte/halfword/word insertion. This takes care of a single 1821 /// element move from V2 into V1. 1822 /// \note 1823 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions. 1824 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 1825 const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); 1826 SDValue V1 = Op.getOperand(0); 1827 SDValue V2 = Op.getOperand(1); 1828 DebugLoc dl = Op.getDebugLoc(); 1829 1830 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 1831 1832 // If we have a single element being moved from V1 to V2, this can be handled 1833 // using the C*[DX] compute mask instructions, but the vector elements have 1834 // to be monotonically increasing with one exception element, and the source 1835 // slot of the element to move must be the same as the destination. 1836 EVT VecVT = V1.getValueType(); 1837 EVT EltVT = VecVT.getVectorElementType(); 1838 unsigned EltsFromV2 = 0; 1839 unsigned V2EltOffset = 0; 1840 unsigned V2EltIdx0 = 0; 1841 unsigned CurrElt = 0; 1842 unsigned MaxElts = VecVT.getVectorNumElements(); 1843 unsigned PrevElt = 0; 1844 bool monotonic = true; 1845 bool rotate = true; 1846 int rotamt=0; 1847 EVT maskVT; // which of the c?d instructions to use 1848 1849 if (EltVT == MVT::i8) { 1850 V2EltIdx0 = 16; 1851 maskVT = MVT::v16i8; 1852 } else if (EltVT == MVT::i16) { 1853 V2EltIdx0 = 8; 1854 maskVT = MVT::v8i16; 1855 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) { 1856 V2EltIdx0 = 4; 1857 maskVT = MVT::v4i32; 1858 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) { 1859 V2EltIdx0 = 2; 1860 maskVT = MVT::v2i64; 1861 } else 1862 llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE"); 1863 1864 for (unsigned i = 0; i != MaxElts; ++i) { 1865 if (SVN->getMaskElt(i) < 0) 1866 continue; 1867 1868 unsigned SrcElt = SVN->getMaskElt(i); 1869 1870 if (monotonic) { 1871 if (SrcElt >= V2EltIdx0) { 1872 // TODO: optimize for the monotonic case when several consecutive 1873 // elements are taken form V2. Do we ever get such a case? 1874 if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0)) 1875 V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8); 1876 else 1877 monotonic = false; 1878 ++EltsFromV2; 1879 } else if (CurrElt != SrcElt) { 1880 monotonic = false; 1881 } 1882 1883 ++CurrElt; 1884 } 1885 1886 if (rotate) { 1887 if (PrevElt > 0 && SrcElt < MaxElts) { 1888 if ((PrevElt == SrcElt - 1) 1889 || (PrevElt == MaxElts - 1 && SrcElt == 0)) { 1890 PrevElt = SrcElt; 1891 } else { 1892 rotate = false; 1893 } 1894 } else if (i == 0 || (PrevElt==0 && SrcElt==1)) { 1895 // First time or after a "wrap around" 1896 rotamt = SrcElt-i; 1897 PrevElt = SrcElt; 1898 } else { 1899 // This isn't a rotation, takes elements from vector 2 1900 rotate = false; 1901 } 1902 } 1903 } 1904 1905 if (EltsFromV2 == 1 && monotonic) { 1906 // Compute mask and shuffle 1907 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1908 1909 // As SHUFFLE_MASK becomes a c?d instruction, feed it an address 1910 // R1 ($sp) is used here only as it is guaranteed to have last bits zero 1911 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 1912 DAG.getRegister(SPU::R1, PtrVT), 1913 DAG.getConstant(V2EltOffset, MVT::i32)); 1914 SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, 1915 maskVT, Pointer); 1916 1917 // Use shuffle mask in SHUFB synthetic instruction: 1918 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1, 1919 ShufMaskOp); 1920 } else if (rotate) { 1921 if (rotamt < 0) 1922 rotamt +=MaxElts; 1923 rotamt *= EltVT.getSizeInBits()/8; 1924 return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(), 1925 V1, DAG.getConstant(rotamt, MVT::i16)); 1926 } else { 1927 // Convert the SHUFFLE_VECTOR mask's input element units to the 1928 // actual bytes. 1929 unsigned BytesPerElement = EltVT.getSizeInBits()/8; 1930 1931 SmallVector<SDValue, 16> ResultMask; 1932 for (unsigned i = 0, e = MaxElts; i != e; ++i) { 1933 unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i); 1934 1935 for (unsigned j = 0; j < BytesPerElement; ++j) 1936 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8)); 1937 } 1938 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, 1939 &ResultMask[0], ResultMask.size()); 1940 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask); 1941 } 1942 } 1943 1944 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { 1945 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar 1946 DebugLoc dl = Op.getDebugLoc(); 1947 1948 if (Op0.getNode()->getOpcode() == ISD::Constant) { 1949 // For a constant, build the appropriate constant vector, which will 1950 // eventually simplify to a vector register load. 1951 1952 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode()); 1953 SmallVector<SDValue, 16> ConstVecValues; 1954 EVT VT; 1955 size_t n_copies; 1956 1957 // Create a constant vector: 1958 switch (Op.getValueType().getSimpleVT().SimpleTy) { 1959 default: llvm_unreachable("Unexpected constant value type in " 1960 "LowerSCALAR_TO_VECTOR"); 1961 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break; 1962 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break; 1963 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break; 1964 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break; 1965 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break; 1966 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break; 1967 } 1968 1969 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT); 1970 for (size_t j = 0; j < n_copies; ++j) 1971 ConstVecValues.push_back(CValue); 1972 1973 return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(), 1974 &ConstVecValues[0], ConstVecValues.size()); 1975 } else { 1976 // Otherwise, copy the value from one register to another: 1977 switch (Op0.getValueType().getSimpleVT().SimpleTy) { 1978 default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR"); 1979 case MVT::i8: 1980 case MVT::i16: 1981 case MVT::i32: 1982 case MVT::i64: 1983 case MVT::f32: 1984 case MVT::f64: 1985 return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0); 1986 } 1987 } 1988 1989 return SDValue(); 1990 } 1991 1992 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 1993 EVT VT = Op.getValueType(); 1994 SDValue N = Op.getOperand(0); 1995 SDValue Elt = Op.getOperand(1); 1996 DebugLoc dl = Op.getDebugLoc(); 1997 SDValue retval; 1998 1999 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { 2000 // Constant argument: 2001 int EltNo = (int) C->getZExtValue(); 2002 2003 // sanity checks: 2004 if (VT == MVT::i8 && EltNo >= 16) 2005 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15"); 2006 else if (VT == MVT::i16 && EltNo >= 8) 2007 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7"); 2008 else if (VT == MVT::i32 && EltNo >= 4) 2009 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4"); 2010 else if (VT == MVT::i64 && EltNo >= 2) 2011 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2"); 2012 2013 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) { 2014 // i32 and i64: Element 0 is the preferred slot 2015 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N); 2016 } 2017 2018 // Need to generate shuffle mask and extract: 2019 int prefslot_begin = -1, prefslot_end = -1; 2020 int elt_byte = EltNo * VT.getSizeInBits() / 8; 2021 2022 switch (VT.getSimpleVT().SimpleTy) { 2023 default: 2024 assert(false && "Invalid value type!"); 2025 case MVT::i8: { 2026 prefslot_begin = prefslot_end = 3; 2027 break; 2028 } 2029 case MVT::i16: { 2030 prefslot_begin = 2; prefslot_end = 3; 2031 break; 2032 } 2033 case MVT::i32: 2034 case MVT::f32: { 2035 prefslot_begin = 0; prefslot_end = 3; 2036 break; 2037 } 2038 case MVT::i64: 2039 case MVT::f64: { 2040 prefslot_begin = 0; prefslot_end = 7; 2041 break; 2042 } 2043 } 2044 2045 assert(prefslot_begin != -1 && prefslot_end != -1 && 2046 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized"); 2047 2048 unsigned int ShufBytes[16] = { 2049 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 2050 }; 2051 for (int i = 0; i < 16; ++i) { 2052 // zero fill uppper part of preferred slot, don't care about the 2053 // other slots: 2054 unsigned int mask_val; 2055 if (i <= prefslot_end) { 2056 mask_val = 2057 ((i < prefslot_begin) 2058 ? 0x80 2059 : elt_byte + (i - prefslot_begin)); 2060 2061 ShufBytes[i] = mask_val; 2062 } else 2063 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)]; 2064 } 2065 2066 SDValue ShufMask[4]; 2067 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) { 2068 unsigned bidx = i * 4; 2069 unsigned int bits = ((ShufBytes[bidx] << 24) | 2070 (ShufBytes[bidx+1] << 16) | 2071 (ShufBytes[bidx+2] << 8) | 2072 ShufBytes[bidx+3]); 2073 ShufMask[i] = DAG.getConstant(bits, MVT::i32); 2074 } 2075 2076 SDValue ShufMaskVec = 2077 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2078 &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0])); 2079 2080 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, 2081 DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(), 2082 N, N, ShufMaskVec)); 2083 } else { 2084 // Variable index: Rotate the requested element into slot 0, then replicate 2085 // slot 0 across the vector 2086 EVT VecVT = N.getValueType(); 2087 if (!VecVT.isSimple() || !VecVT.isVector()) { 2088 report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit" 2089 "vector type!"); 2090 } 2091 2092 // Make life easier by making sure the index is zero-extended to i32 2093 if (Elt.getValueType() != MVT::i32) 2094 Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt); 2095 2096 // Scale the index to a bit/byte shift quantity 2097 APInt scaleFactor = 2098 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false); 2099 unsigned scaleShift = scaleFactor.logBase2(); 2100 SDValue vecShift; 2101 2102 if (scaleShift > 0) { 2103 // Scale the shift factor: 2104 Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt, 2105 DAG.getConstant(scaleShift, MVT::i32)); 2106 } 2107 2108 vecShift = DAG.getNode(SPUISD::SHL_BYTES, dl, VecVT, N, Elt); 2109 2110 // Replicate the bytes starting at byte 0 across the entire vector (for 2111 // consistency with the notion of a unified register set) 2112 SDValue replicate; 2113 2114 switch (VT.getSimpleVT().SimpleTy) { 2115 default: 2116 report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector" 2117 "type"); 2118 /*NOTREACHED*/ 2119 case MVT::i8: { 2120 SDValue factor = DAG.getConstant(0x00000000, MVT::i32); 2121 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2122 factor, factor, factor, factor); 2123 break; 2124 } 2125 case MVT::i16: { 2126 SDValue factor = DAG.getConstant(0x00010001, MVT::i32); 2127 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2128 factor, factor, factor, factor); 2129 break; 2130 } 2131 case MVT::i32: 2132 case MVT::f32: { 2133 SDValue factor = DAG.getConstant(0x00010203, MVT::i32); 2134 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2135 factor, factor, factor, factor); 2136 break; 2137 } 2138 case MVT::i64: 2139 case MVT::f64: { 2140 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32); 2141 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32); 2142 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2143 loFactor, hiFactor, loFactor, hiFactor); 2144 break; 2145 } 2146 } 2147 2148 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, 2149 DAG.getNode(SPUISD::SHUFB, dl, VecVT, 2150 vecShift, vecShift, replicate)); 2151 } 2152 2153 return retval; 2154 } 2155 2156 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 2157 SDValue VecOp = Op.getOperand(0); 2158 SDValue ValOp = Op.getOperand(1); 2159 SDValue IdxOp = Op.getOperand(2); 2160 DebugLoc dl = Op.getDebugLoc(); 2161 EVT VT = Op.getValueType(); 2162 EVT eltVT = ValOp.getValueType(); 2163 2164 // use 0 when the lane to insert to is 'undef' 2165 int64_t Offset=0; 2166 if (IdxOp.getOpcode() != ISD::UNDEF) { 2167 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp); 2168 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); 2169 Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8; 2170 } 2171 2172 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2173 // Use $sp ($1) because it's always 16-byte aligned and it's available: 2174 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 2175 DAG.getRegister(SPU::R1, PtrVT), 2176 DAG.getConstant(Offset, PtrVT)); 2177 // widen the mask when dealing with half vectors 2178 EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(), 2179 128/ VT.getVectorElementType().getSizeInBits()); 2180 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer); 2181 2182 SDValue result = 2183 DAG.getNode(SPUISD::SHUFB, dl, VT, 2184 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp), 2185 VecOp, 2186 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ShufMask)); 2187 2188 return result; 2189 } 2190 2191 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, 2192 const TargetLowering &TLI) 2193 { 2194 SDValue N0 = Op.getOperand(0); // Everything has at least one operand 2195 DebugLoc dl = Op.getDebugLoc(); 2196 EVT ShiftVT = TLI.getShiftAmountTy(N0.getValueType()); 2197 2198 assert(Op.getValueType() == MVT::i8); 2199 switch (Opc) { 2200 default: 2201 llvm_unreachable("Unhandled i8 math operator"); 2202 /*NOTREACHED*/ 2203 break; 2204 case ISD::ADD: { 2205 // 8-bit addition: Promote the arguments up to 16-bits and truncate 2206 // the result: 2207 SDValue N1 = Op.getOperand(1); 2208 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); 2209 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); 2210 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2211 DAG.getNode(Opc, dl, MVT::i16, N0, N1)); 2212 2213 } 2214 2215 case ISD::SUB: { 2216 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate 2217 // the result: 2218 SDValue N1 = Op.getOperand(1); 2219 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); 2220 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); 2221 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2222 DAG.getNode(Opc, dl, MVT::i16, N0, N1)); 2223 } 2224 case ISD::ROTR: 2225 case ISD::ROTL: { 2226 SDValue N1 = Op.getOperand(1); 2227 EVT N1VT = N1.getValueType(); 2228 2229 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); 2230 if (!N1VT.bitsEq(ShiftVT)) { 2231 unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT) 2232 ? ISD::ZERO_EXTEND 2233 : ISD::TRUNCATE; 2234 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); 2235 } 2236 2237 // Replicate lower 8-bits into upper 8: 2238 SDValue ExpandArg = 2239 DAG.getNode(ISD::OR, dl, MVT::i16, N0, 2240 DAG.getNode(ISD::SHL, dl, MVT::i16, 2241 N0, DAG.getConstant(8, MVT::i32))); 2242 2243 // Truncate back down to i8 2244 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2245 DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1)); 2246 } 2247 case ISD::SRL: 2248 case ISD::SHL: { 2249 SDValue N1 = Op.getOperand(1); 2250 EVT N1VT = N1.getValueType(); 2251 2252 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); 2253 if (!N1VT.bitsEq(ShiftVT)) { 2254 unsigned N1Opc = ISD::ZERO_EXTEND; 2255 2256 if (N1.getValueType().bitsGT(ShiftVT)) 2257 N1Opc = ISD::TRUNCATE; 2258 2259 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); 2260 } 2261 2262 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2263 DAG.getNode(Opc, dl, MVT::i16, N0, N1)); 2264 } 2265 case ISD::SRA: { 2266 SDValue N1 = Op.getOperand(1); 2267 EVT N1VT = N1.getValueType(); 2268 2269 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); 2270 if (!N1VT.bitsEq(ShiftVT)) { 2271 unsigned N1Opc = ISD::SIGN_EXTEND; 2272 2273 if (N1VT.bitsGT(ShiftVT)) 2274 N1Opc = ISD::TRUNCATE; 2275 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); 2276 } 2277 2278 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2279 DAG.getNode(Opc, dl, MVT::i16, N0, N1)); 2280 } 2281 case ISD::MUL: { 2282 SDValue N1 = Op.getOperand(1); 2283 2284 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); 2285 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); 2286 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2287 DAG.getNode(Opc, dl, MVT::i16, N0, N1)); 2288 break; 2289 } 2290 } 2291 2292 return SDValue(); 2293 } 2294 2295 //! Lower byte immediate operations for v16i8 vectors: 2296 static SDValue 2297 LowerByteImmed(SDValue Op, SelectionDAG &DAG) { 2298 SDValue ConstVec; 2299 SDValue Arg; 2300 EVT VT = Op.getValueType(); 2301 DebugLoc dl = Op.getDebugLoc(); 2302 2303 ConstVec = Op.getOperand(0); 2304 Arg = Op.getOperand(1); 2305 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) { 2306 if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) { 2307 ConstVec = ConstVec.getOperand(0); 2308 } else { 2309 ConstVec = Op.getOperand(1); 2310 Arg = Op.getOperand(0); 2311 if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) { 2312 ConstVec = ConstVec.getOperand(0); 2313 } 2314 } 2315 } 2316 2317 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) { 2318 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode()); 2319 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed"); 2320 2321 APInt APSplatBits, APSplatUndef; 2322 unsigned SplatBitSize; 2323 bool HasAnyUndefs; 2324 unsigned minSplatBits = VT.getVectorElementType().getSizeInBits(); 2325 2326 if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, 2327 HasAnyUndefs, minSplatBits) 2328 && minSplatBits <= SplatBitSize) { 2329 uint64_t SplatBits = APSplatBits.getZExtValue(); 2330 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8); 2331 2332 SmallVector<SDValue, 16> tcVec; 2333 tcVec.assign(16, tc); 2334 return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg, 2335 DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size())); 2336 } 2337 } 2338 2339 // These operations (AND, OR, XOR) are legal, they just couldn't be custom 2340 // lowered. Return the operation, rather than a null SDValue. 2341 return Op; 2342 } 2343 2344 //! Custom lowering for CTPOP (count population) 2345 /*! 2346 Custom lowering code that counts the number ones in the input 2347 operand. SPU has such an instruction, but it counts the number of 2348 ones per byte, which then have to be accumulated. 2349 */ 2350 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { 2351 EVT VT = Op.getValueType(); 2352 EVT vecVT = EVT::getVectorVT(*DAG.getContext(), 2353 VT, (128 / VT.getSizeInBits())); 2354 DebugLoc dl = Op.getDebugLoc(); 2355 2356 switch (VT.getSimpleVT().SimpleTy) { 2357 default: 2358 assert(false && "Invalid value type!"); 2359 case MVT::i8: { 2360 SDValue N = Op.getOperand(0); 2361 SDValue Elt0 = DAG.getConstant(0, MVT::i32); 2362 2363 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); 2364 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); 2365 2366 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0); 2367 } 2368 2369 case MVT::i16: { 2370 MachineFunction &MF = DAG.getMachineFunction(); 2371 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2372 2373 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass); 2374 2375 SDValue N = Op.getOperand(0); 2376 SDValue Elt0 = DAG.getConstant(0, MVT::i16); 2377 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16); 2378 SDValue Shift1 = DAG.getConstant(8, MVT::i32); 2379 2380 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); 2381 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); 2382 2383 // CNTB_result becomes the chain to which all of the virtual registers 2384 // CNTB_reg, SUM1_reg become associated: 2385 SDValue CNTB_result = 2386 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0); 2387 2388 SDValue CNTB_rescopy = 2389 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result); 2390 2391 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16); 2392 2393 return DAG.getNode(ISD::AND, dl, MVT::i16, 2394 DAG.getNode(ISD::ADD, dl, MVT::i16, 2395 DAG.getNode(ISD::SRL, dl, MVT::i16, 2396 Tmp1, Shift1), 2397 Tmp1), 2398 Mask0); 2399 } 2400 2401 case MVT::i32: { 2402 MachineFunction &MF = DAG.getMachineFunction(); 2403 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2404 2405 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); 2406 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); 2407 2408 SDValue N = Op.getOperand(0); 2409 SDValue Elt0 = DAG.getConstant(0, MVT::i32); 2410 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32); 2411 SDValue Shift1 = DAG.getConstant(16, MVT::i32); 2412 SDValue Shift2 = DAG.getConstant(8, MVT::i32); 2413 2414 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); 2415 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); 2416 2417 // CNTB_result becomes the chain to which all of the virtual registers 2418 // CNTB_reg, SUM1_reg become associated: 2419 SDValue CNTB_result = 2420 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0); 2421 2422 SDValue CNTB_rescopy = 2423 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result); 2424 2425 SDValue Comp1 = 2426 DAG.getNode(ISD::SRL, dl, MVT::i32, 2427 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32), 2428 Shift1); 2429 2430 SDValue Sum1 = 2431 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1, 2432 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32)); 2433 2434 SDValue Sum1_rescopy = 2435 DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1); 2436 2437 SDValue Comp2 = 2438 DAG.getNode(ISD::SRL, dl, MVT::i32, 2439 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32), 2440 Shift2); 2441 SDValue Sum2 = 2442 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2, 2443 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32)); 2444 2445 return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0); 2446 } 2447 2448 case MVT::i64: 2449 break; 2450 } 2451 2452 return SDValue(); 2453 } 2454 2455 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32 2456 /*! 2457 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall. 2458 All conversions to i64 are expanded to a libcall. 2459 */ 2460 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, 2461 const SPUTargetLowering &TLI) { 2462 EVT OpVT = Op.getValueType(); 2463 SDValue Op0 = Op.getOperand(0); 2464 EVT Op0VT = Op0.getValueType(); 2465 2466 if ((OpVT == MVT::i32 && Op0VT == MVT::f64) 2467 || OpVT == MVT::i64) { 2468 // Convert f32 / f64 to i32 / i64 via libcall. 2469 RTLIB::Libcall LC = 2470 (Op.getOpcode() == ISD::FP_TO_SINT) 2471 ? RTLIB::getFPTOSINT(Op0VT, OpVT) 2472 : RTLIB::getFPTOUINT(Op0VT, OpVT); 2473 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!"); 2474 SDValue Dummy; 2475 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); 2476 } 2477 2478 return Op; 2479 } 2480 2481 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32 2482 /*! 2483 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall. 2484 All conversions from i64 are expanded to a libcall. 2485 */ 2486 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, 2487 const SPUTargetLowering &TLI) { 2488 EVT OpVT = Op.getValueType(); 2489 SDValue Op0 = Op.getOperand(0); 2490 EVT Op0VT = Op0.getValueType(); 2491 2492 if ((OpVT == MVT::f64 && Op0VT == MVT::i32) 2493 || Op0VT == MVT::i64) { 2494 // Convert i32, i64 to f64 via libcall: 2495 RTLIB::Libcall LC = 2496 (Op.getOpcode() == ISD::SINT_TO_FP) 2497 ? RTLIB::getSINTTOFP(Op0VT, OpVT) 2498 : RTLIB::getUINTTOFP(Op0VT, OpVT); 2499 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!"); 2500 SDValue Dummy; 2501 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); 2502 } 2503 2504 return Op; 2505 } 2506 2507 //! Lower ISD::SETCC 2508 /*! 2509 This handles MVT::f64 (double floating point) condition lowering 2510 */ 2511 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG, 2512 const TargetLowering &TLI) { 2513 CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2)); 2514 DebugLoc dl = Op.getDebugLoc(); 2515 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n"); 2516 2517 SDValue lhs = Op.getOperand(0); 2518 SDValue rhs = Op.getOperand(1); 2519 EVT lhsVT = lhs.getValueType(); 2520 assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n"); 2521 2522 EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType()); 2523 APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); 2524 EVT IntVT(MVT::i64); 2525 2526 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently 2527 // selected to a NOP: 2528 SDValue i64lhs = DAG.getNode(ISD::BITCAST, dl, IntVT, lhs); 2529 SDValue lhsHi32 = 2530 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, 2531 DAG.getNode(ISD::SRL, dl, IntVT, 2532 i64lhs, DAG.getConstant(32, MVT::i32))); 2533 SDValue lhsHi32abs = 2534 DAG.getNode(ISD::AND, dl, MVT::i32, 2535 lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32)); 2536 SDValue lhsLo32 = 2537 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs); 2538 2539 // SETO and SETUO only use the lhs operand: 2540 if (CC->get() == ISD::SETO) { 2541 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of 2542 // SETUO 2543 APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); 2544 return DAG.getNode(ISD::XOR, dl, ccResultVT, 2545 DAG.getSetCC(dl, ccResultVT, 2546 lhs, DAG.getConstantFP(0.0, lhsVT), 2547 ISD::SETUO), 2548 DAG.getConstant(ccResultAllOnes, ccResultVT)); 2549 } else if (CC->get() == ISD::SETUO) { 2550 // Evaluates to true if Op0 is [SQ]NaN 2551 return DAG.getNode(ISD::AND, dl, ccResultVT, 2552 DAG.getSetCC(dl, ccResultVT, 2553 lhsHi32abs, 2554 DAG.getConstant(0x7ff00000, MVT::i32), 2555 ISD::SETGE), 2556 DAG.getSetCC(dl, ccResultVT, 2557 lhsLo32, 2558 DAG.getConstant(0, MVT::i32), 2559 ISD::SETGT)); 2560 } 2561 2562 SDValue i64rhs = DAG.getNode(ISD::BITCAST, dl, IntVT, rhs); 2563 SDValue rhsHi32 = 2564 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, 2565 DAG.getNode(ISD::SRL, dl, IntVT, 2566 i64rhs, DAG.getConstant(32, MVT::i32))); 2567 2568 // If a value is negative, subtract from the sign magnitude constant: 2569 SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT); 2570 2571 // Convert the sign-magnitude representation into 2's complement: 2572 SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT, 2573 lhsHi32, DAG.getConstant(31, MVT::i32)); 2574 SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs); 2575 SDValue lhsSelect = 2576 DAG.getNode(ISD::SELECT, dl, IntVT, 2577 lhsSelectMask, lhsSignMag2TC, i64lhs); 2578 2579 SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT, 2580 rhsHi32, DAG.getConstant(31, MVT::i32)); 2581 SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs); 2582 SDValue rhsSelect = 2583 DAG.getNode(ISD::SELECT, dl, IntVT, 2584 rhsSelectMask, rhsSignMag2TC, i64rhs); 2585 2586 unsigned compareOp; 2587 2588 switch (CC->get()) { 2589 case ISD::SETOEQ: 2590 case ISD::SETUEQ: 2591 compareOp = ISD::SETEQ; break; 2592 case ISD::SETOGT: 2593 case ISD::SETUGT: 2594 compareOp = ISD::SETGT; break; 2595 case ISD::SETOGE: 2596 case ISD::SETUGE: 2597 compareOp = ISD::SETGE; break; 2598 case ISD::SETOLT: 2599 case ISD::SETULT: 2600 compareOp = ISD::SETLT; break; 2601 case ISD::SETOLE: 2602 case ISD::SETULE: 2603 compareOp = ISD::SETLE; break; 2604 case ISD::SETUNE: 2605 case ISD::SETONE: 2606 compareOp = ISD::SETNE; break; 2607 default: 2608 report_fatal_error("CellSPU ISel Select: unimplemented f64 condition"); 2609 } 2610 2611 SDValue result = 2612 DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect, 2613 (ISD::CondCode) compareOp); 2614 2615 if ((CC->get() & 0x8) == 0) { 2616 // Ordered comparison: 2617 SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT, 2618 lhs, DAG.getConstantFP(0.0, MVT::f64), 2619 ISD::SETO); 2620 SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT, 2621 rhs, DAG.getConstantFP(0.0, MVT::f64), 2622 ISD::SETO); 2623 SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN); 2624 2625 result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result); 2626 } 2627 2628 return result; 2629 } 2630 2631 //! Lower ISD::SELECT_CC 2632 /*! 2633 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the 2634 SELB instruction. 2635 2636 \note Need to revisit this in the future: if the code path through the true 2637 and false value computations is longer than the latency of a branch (6 2638 cycles), then it would be more advantageous to branch and insert a new basic 2639 block and branch on the condition. However, this code does not make that 2640 assumption, given the simplisitc uses so far. 2641 */ 2642 2643 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, 2644 const TargetLowering &TLI) { 2645 EVT VT = Op.getValueType(); 2646 SDValue lhs = Op.getOperand(0); 2647 SDValue rhs = Op.getOperand(1); 2648 SDValue trueval = Op.getOperand(2); 2649 SDValue falseval = Op.getOperand(3); 2650 SDValue condition = Op.getOperand(4); 2651 DebugLoc dl = Op.getDebugLoc(); 2652 2653 // NOTE: SELB's arguments: $rA, $rB, $mask 2654 // 2655 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB 2656 // where bits in $mask are 1. CCond will be inverted, having 1s where the 2657 // condition was true and 0s where the condition was false. Hence, the 2658 // arguments to SELB get reversed. 2659 2660 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's 2661 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up 2662 // with another "cannot select select_cc" assert: 2663 2664 SDValue compare = DAG.getNode(ISD::SETCC, dl, 2665 TLI.getSetCCResultType(Op.getValueType()), 2666 lhs, rhs, condition); 2667 return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare); 2668 } 2669 2670 //! Custom lower ISD::TRUNCATE 2671 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) 2672 { 2673 // Type to truncate to 2674 EVT VT = Op.getValueType(); 2675 MVT simpleVT = VT.getSimpleVT(); 2676 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), 2677 VT, (128 / VT.getSizeInBits())); 2678 DebugLoc dl = Op.getDebugLoc(); 2679 2680 // Type to truncate from 2681 SDValue Op0 = Op.getOperand(0); 2682 EVT Op0VT = Op0.getValueType(); 2683 2684 if (Op0VT == MVT::i128 && simpleVT == MVT::i64) { 2685 // Create shuffle mask, least significant doubleword of quadword 2686 unsigned maskHigh = 0x08090a0b; 2687 unsigned maskLow = 0x0c0d0e0f; 2688 // Use a shuffle to perform the truncation 2689 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2690 DAG.getConstant(maskHigh, MVT::i32), 2691 DAG.getConstant(maskLow, MVT::i32), 2692 DAG.getConstant(maskHigh, MVT::i32), 2693 DAG.getConstant(maskLow, MVT::i32)); 2694 2695 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT, 2696 Op0, Op0, shufMask); 2697 2698 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle); 2699 } 2700 2701 return SDValue(); // Leave the truncate unmolested 2702 } 2703 2704 /*! 2705 * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic 2706 * algorithm is to duplicate the sign bit using rotmai to generate at 2707 * least one byte full of sign bits. Then propagate the "sign-byte" into 2708 * the leftmost words and the i64/i32 into the rightmost words using shufb. 2709 * 2710 * @param Op The sext operand 2711 * @param DAG The current DAG 2712 * @return The SDValue with the entire instruction sequence 2713 */ 2714 static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) 2715 { 2716 DebugLoc dl = Op.getDebugLoc(); 2717 2718 // Type to extend to 2719 MVT OpVT = Op.getValueType().getSimpleVT(); 2720 2721 // Type to extend from 2722 SDValue Op0 = Op.getOperand(0); 2723 MVT Op0VT = Op0.getValueType().getSimpleVT(); 2724 2725 // extend i8 & i16 via i32 2726 if (Op0VT == MVT::i8 || Op0VT == MVT::i16) { 2727 Op0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Op0); 2728 Op0VT = MVT::i32; 2729 } 2730 2731 // The type to extend to needs to be a i128 and 2732 // the type to extend from needs to be i64 or i32. 2733 assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) && 2734 "LowerSIGN_EXTEND: input and/or output operand have wrong size"); 2735 (void)OpVT; 2736 2737 // Create shuffle mask 2738 unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7 2739 unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte 8 - 11 2740 unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15 2741 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2742 DAG.getConstant(mask1, MVT::i32), 2743 DAG.getConstant(mask1, MVT::i32), 2744 DAG.getConstant(mask2, MVT::i32), 2745 DAG.getConstant(mask3, MVT::i32)); 2746 2747 // Word wise arithmetic right shift to generate at least one byte 2748 // that contains sign bits. 2749 MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32; 2750 SDValue sraVal = DAG.getNode(ISD::SRA, 2751 dl, 2752 mvt, 2753 DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0), 2754 DAG.getConstant(31, MVT::i32)); 2755 2756 // reinterpret as a i128 (SHUFB requires it). This gets lowered away. 2757 SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 2758 dl, Op0VT, Op0, 2759 DAG.getTargetConstant( 2760 SPU::GPRCRegClass.getID(), 2761 MVT::i32)), 0); 2762 // Shuffle bytes - Copy the sign bits into the upper 64 bits 2763 // and the input value into the lower 64 bits. 2764 SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt, 2765 extended, sraVal, shufMask); 2766 return DAG.getNode(ISD::BITCAST, dl, MVT::i128, extShuffle); 2767 } 2768 2769 //! Custom (target-specific) lowering entry point 2770 /*! 2771 This is where LLVM's DAG selection process calls to do target-specific 2772 lowering of nodes. 2773 */ 2774 SDValue 2775 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const 2776 { 2777 unsigned Opc = (unsigned) Op.getOpcode(); 2778 EVT VT = Op.getValueType(); 2779 2780 switch (Opc) { 2781 default: { 2782 #ifndef NDEBUG 2783 errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n"; 2784 errs() << "Op.getOpcode() = " << Opc << "\n"; 2785 errs() << "*Op.getNode():\n"; 2786 Op.getNode()->dump(); 2787 #endif 2788 llvm_unreachable(0); 2789 } 2790 case ISD::LOAD: 2791 case ISD::EXTLOAD: 2792 case ISD::SEXTLOAD: 2793 case ISD::ZEXTLOAD: 2794 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl()); 2795 case ISD::STORE: 2796 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl()); 2797 case ISD::ConstantPool: 2798 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl()); 2799 case ISD::GlobalAddress: 2800 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl()); 2801 case ISD::JumpTable: 2802 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl()); 2803 case ISD::ConstantFP: 2804 return LowerConstantFP(Op, DAG); 2805 2806 // i8, i64 math ops: 2807 case ISD::ADD: 2808 case ISD::SUB: 2809 case ISD::ROTR: 2810 case ISD::ROTL: 2811 case ISD::SRL: 2812 case ISD::SHL: 2813 case ISD::SRA: { 2814 if (VT == MVT::i8) 2815 return LowerI8Math(Op, DAG, Opc, *this); 2816 break; 2817 } 2818 2819 case ISD::FP_TO_SINT: 2820 case ISD::FP_TO_UINT: 2821 return LowerFP_TO_INT(Op, DAG, *this); 2822 2823 case ISD::SINT_TO_FP: 2824 case ISD::UINT_TO_FP: 2825 return LowerINT_TO_FP(Op, DAG, *this); 2826 2827 // Vector-related lowering. 2828 case ISD::BUILD_VECTOR: 2829 return LowerBUILD_VECTOR(Op, DAG); 2830 case ISD::SCALAR_TO_VECTOR: 2831 return LowerSCALAR_TO_VECTOR(Op, DAG); 2832 case ISD::VECTOR_SHUFFLE: 2833 return LowerVECTOR_SHUFFLE(Op, DAG); 2834 case ISD::EXTRACT_VECTOR_ELT: 2835 return LowerEXTRACT_VECTOR_ELT(Op, DAG); 2836 case ISD::INSERT_VECTOR_ELT: 2837 return LowerINSERT_VECTOR_ELT(Op, DAG); 2838 2839 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately: 2840 case ISD::AND: 2841 case ISD::OR: 2842 case ISD::XOR: 2843 return LowerByteImmed(Op, DAG); 2844 2845 // Vector and i8 multiply: 2846 case ISD::MUL: 2847 if (VT == MVT::i8) 2848 return LowerI8Math(Op, DAG, Opc, *this); 2849 2850 case ISD::CTPOP: 2851 return LowerCTPOP(Op, DAG); 2852 2853 case ISD::SELECT_CC: 2854 return LowerSELECT_CC(Op, DAG, *this); 2855 2856 case ISD::SETCC: 2857 return LowerSETCC(Op, DAG, *this); 2858 2859 case ISD::TRUNCATE: 2860 return LowerTRUNCATE(Op, DAG); 2861 2862 case ISD::SIGN_EXTEND: 2863 return LowerSIGN_EXTEND(Op, DAG); 2864 } 2865 2866 return SDValue(); 2867 } 2868 2869 void SPUTargetLowering::ReplaceNodeResults(SDNode *N, 2870 SmallVectorImpl<SDValue>&Results, 2871 SelectionDAG &DAG) const 2872 { 2873 #if 0 2874 unsigned Opc = (unsigned) N->getOpcode(); 2875 EVT OpVT = N->getValueType(0); 2876 2877 switch (Opc) { 2878 default: { 2879 errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n"; 2880 errs() << "Op.getOpcode() = " << Opc << "\n"; 2881 errs() << "*Op.getNode():\n"; 2882 N->dump(); 2883 abort(); 2884 /*NOTREACHED*/ 2885 } 2886 } 2887 #endif 2888 2889 /* Otherwise, return unchanged */ 2890 } 2891 2892 //===----------------------------------------------------------------------===// 2893 // Target Optimization Hooks 2894 //===----------------------------------------------------------------------===// 2895 2896 SDValue 2897 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const 2898 { 2899 #if 0 2900 TargetMachine &TM = getTargetMachine(); 2901 #endif 2902 const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); 2903 SelectionDAG &DAG = DCI.DAG; 2904 SDValue Op0 = N->getOperand(0); // everything has at least one operand 2905 EVT NodeVT = N->getValueType(0); // The node's value type 2906 EVT Op0VT = Op0.getValueType(); // The first operand's result 2907 SDValue Result; // Initially, empty result 2908 DebugLoc dl = N->getDebugLoc(); 2909 2910 switch (N->getOpcode()) { 2911 default: break; 2912 case ISD::ADD: { 2913 SDValue Op1 = N->getOperand(1); 2914 2915 if (Op0.getOpcode() == SPUISD::IndirectAddr 2916 || Op1.getOpcode() == SPUISD::IndirectAddr) { 2917 // Normalize the operands to reduce repeated code 2918 SDValue IndirectArg = Op0, AddArg = Op1; 2919 2920 if (Op1.getOpcode() == SPUISD::IndirectAddr) { 2921 IndirectArg = Op1; 2922 AddArg = Op0; 2923 } 2924 2925 if (isa<ConstantSDNode>(AddArg)) { 2926 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg); 2927 SDValue IndOp1 = IndirectArg.getOperand(1); 2928 2929 if (CN0->isNullValue()) { 2930 // (add (SPUindirect <arg>, <arg>), 0) -> 2931 // (SPUindirect <arg>, <arg>) 2932 2933 #if !defined(NDEBUG) 2934 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 2935 errs() << "\n" 2936 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n" 2937 << "With: (SPUindirect <arg>, <arg>)\n"; 2938 } 2939 #endif 2940 2941 return IndirectArg; 2942 } else if (isa<ConstantSDNode>(IndOp1)) { 2943 // (add (SPUindirect <arg>, <const>), <const>) -> 2944 // (SPUindirect <arg>, <const + const>) 2945 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1); 2946 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue(); 2947 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT); 2948 2949 #if !defined(NDEBUG) 2950 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 2951 errs() << "\n" 2952 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue() 2953 << "), " << CN0->getSExtValue() << ")\n" 2954 << "With: (SPUindirect <arg>, " 2955 << combinedConst << ")\n"; 2956 } 2957 #endif 2958 2959 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT, 2960 IndirectArg, combinedValue); 2961 } 2962 } 2963 } 2964 break; 2965 } 2966 case ISD::SIGN_EXTEND: 2967 case ISD::ZERO_EXTEND: 2968 case ISD::ANY_EXTEND: { 2969 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) { 2970 // (any_extend (SPUextract_elt0 <arg>)) -> 2971 // (SPUextract_elt0 <arg>) 2972 // Types must match, however... 2973 #if !defined(NDEBUG) 2974 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 2975 errs() << "\nReplace: "; 2976 N->dump(&DAG); 2977 errs() << "\nWith: "; 2978 Op0.getNode()->dump(&DAG); 2979 errs() << "\n"; 2980 } 2981 #endif 2982 2983 return Op0; 2984 } 2985 break; 2986 } 2987 case SPUISD::IndirectAddr: { 2988 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) { 2989 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)); 2990 if (CN != 0 && CN->isNullValue()) { 2991 // (SPUindirect (SPUaform <addr>, 0), 0) -> 2992 // (SPUaform <addr>, 0) 2993 2994 DEBUG(errs() << "Replace: "); 2995 DEBUG(N->dump(&DAG)); 2996 DEBUG(errs() << "\nWith: "); 2997 DEBUG(Op0.getNode()->dump(&DAG)); 2998 DEBUG(errs() << "\n"); 2999 3000 return Op0; 3001 } 3002 } else if (Op0.getOpcode() == ISD::ADD) { 3003 SDValue Op1 = N->getOperand(1); 3004 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) { 3005 // (SPUindirect (add <arg>, <arg>), 0) -> 3006 // (SPUindirect <arg>, <arg>) 3007 if (CN1->isNullValue()) { 3008 3009 #if !defined(NDEBUG) 3010 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 3011 errs() << "\n" 3012 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n" 3013 << "With: (SPUindirect <arg>, <arg>)\n"; 3014 } 3015 #endif 3016 3017 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT, 3018 Op0.getOperand(0), Op0.getOperand(1)); 3019 } 3020 } 3021 } 3022 break; 3023 } 3024 case SPUISD::SHL_BITS: 3025 case SPUISD::SHL_BYTES: 3026 case SPUISD::ROTBYTES_LEFT: { 3027 SDValue Op1 = N->getOperand(1); 3028 3029 // Kill degenerate vector shifts: 3030 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) { 3031 if (CN->isNullValue()) { 3032 Result = Op0; 3033 } 3034 } 3035 break; 3036 } 3037 case SPUISD::PREFSLOT2VEC: { 3038 switch (Op0.getOpcode()) { 3039 default: 3040 break; 3041 case ISD::ANY_EXTEND: 3042 case ISD::ZERO_EXTEND: 3043 case ISD::SIGN_EXTEND: { 3044 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) -> 3045 // <arg> 3046 // but only if the SPUprefslot2vec and <arg> types match. 3047 SDValue Op00 = Op0.getOperand(0); 3048 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) { 3049 SDValue Op000 = Op00.getOperand(0); 3050 if (Op000.getValueType() == NodeVT) { 3051 Result = Op000; 3052 } 3053 } 3054 break; 3055 } 3056 case SPUISD::VEC2PREFSLOT: { 3057 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) -> 3058 // <arg> 3059 Result = Op0.getOperand(0); 3060 break; 3061 } 3062 } 3063 break; 3064 } 3065 } 3066 3067 // Otherwise, return unchanged. 3068 #ifndef NDEBUG 3069 if (Result.getNode()) { 3070 DEBUG(errs() << "\nReplace.SPU: "); 3071 DEBUG(N->dump(&DAG)); 3072 DEBUG(errs() << "\nWith: "); 3073 DEBUG(Result.getNode()->dump(&DAG)); 3074 DEBUG(errs() << "\n"); 3075 } 3076 #endif 3077 3078 return Result; 3079 } 3080 3081 //===----------------------------------------------------------------------===// 3082 // Inline Assembly Support 3083 //===----------------------------------------------------------------------===// 3084 3085 /// getConstraintType - Given a constraint letter, return the type of 3086 /// constraint it is for this target. 3087 SPUTargetLowering::ConstraintType 3088 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const { 3089 if (ConstraintLetter.size() == 1) { 3090 switch (ConstraintLetter[0]) { 3091 default: break; 3092 case 'b': 3093 case 'r': 3094 case 'f': 3095 case 'v': 3096 case 'y': 3097 return C_RegisterClass; 3098 } 3099 } 3100 return TargetLowering::getConstraintType(ConstraintLetter); 3101 } 3102 3103 /// Examine constraint type and operand type and determine a weight value. 3104 /// This object must already have been set up with the operand type 3105 /// and the current alternative constraint selected. 3106 TargetLowering::ConstraintWeight 3107 SPUTargetLowering::getSingleConstraintMatchWeight( 3108 AsmOperandInfo &info, const char *constraint) const { 3109 ConstraintWeight weight = CW_Invalid; 3110 Value *CallOperandVal = info.CallOperandVal; 3111 // If we don't have a value, we can't do a match, 3112 // but allow it at the lowest weight. 3113 if (CallOperandVal == NULL) 3114 return CW_Default; 3115 // Look at the constraint type. 3116 switch (*constraint) { 3117 default: 3118 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 3119 break; 3120 //FIXME: Seems like the supported constraint letters were just copied 3121 // from PPC, as the following doesn't correspond to the GCC docs. 3122 // I'm leaving it so until someone adds the corresponding lowering support. 3123 case 'b': 3124 case 'r': 3125 case 'f': 3126 case 'd': 3127 case 'v': 3128 case 'y': 3129 weight = CW_Register; 3130 break; 3131 } 3132 return weight; 3133 } 3134 3135 std::pair<unsigned, const TargetRegisterClass*> 3136 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 3137 EVT VT) const 3138 { 3139 if (Constraint.size() == 1) { 3140 // GCC RS6000 Constraint Letters 3141 switch (Constraint[0]) { 3142 case 'b': // R1-R31 3143 case 'r': // R0-R31 3144 if (VT == MVT::i64) 3145 return std::make_pair(0U, SPU::R64CRegisterClass); 3146 return std::make_pair(0U, SPU::R32CRegisterClass); 3147 case 'f': 3148 if (VT == MVT::f32) 3149 return std::make_pair(0U, SPU::R32FPRegisterClass); 3150 else if (VT == MVT::f64) 3151 return std::make_pair(0U, SPU::R64FPRegisterClass); 3152 break; 3153 case 'v': 3154 return std::make_pair(0U, SPU::GPRCRegisterClass); 3155 } 3156 } 3157 3158 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 3159 } 3160 3161 //! Compute used/known bits for a SPU operand 3162 void 3163 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 3164 const APInt &Mask, 3165 APInt &KnownZero, 3166 APInt &KnownOne, 3167 const SelectionDAG &DAG, 3168 unsigned Depth ) const { 3169 #if 0 3170 const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT; 3171 3172 switch (Op.getOpcode()) { 3173 default: 3174 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); 3175 break; 3176 case CALL: 3177 case SHUFB: 3178 case SHUFFLE_MASK: 3179 case CNTB: 3180 case SPUISD::PREFSLOT2VEC: 3181 case SPUISD::LDRESULT: 3182 case SPUISD::VEC2PREFSLOT: 3183 case SPUISD::SHLQUAD_L_BITS: 3184 case SPUISD::SHLQUAD_L_BYTES: 3185 case SPUISD::VEC_ROTL: 3186 case SPUISD::VEC_ROTR: 3187 case SPUISD::ROTBYTES_LEFT: 3188 case SPUISD::SELECT_MASK: 3189 case SPUISD::SELB: 3190 } 3191 #endif 3192 } 3193 3194 unsigned 3195 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, 3196 unsigned Depth) const { 3197 switch (Op.getOpcode()) { 3198 default: 3199 return 1; 3200 3201 case ISD::SETCC: { 3202 EVT VT = Op.getValueType(); 3203 3204 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) { 3205 VT = MVT::i32; 3206 } 3207 return VT.getSizeInBits(); 3208 } 3209 } 3210 } 3211 3212 // LowerAsmOperandForConstraint 3213 void 3214 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 3215 std::string &Constraint, 3216 std::vector<SDValue> &Ops, 3217 SelectionDAG &DAG) const { 3218 // Default, for the time being, to the base class handler 3219 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 3220 } 3221 3222 /// isLegalAddressImmediate - Return true if the integer value can be used 3223 /// as the offset of the target addressing mode. 3224 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, 3225 Type *Ty) const { 3226 // SPU's addresses are 256K: 3227 return (V > -(1 << 18) && V < (1 << 18) - 1); 3228 } 3229 3230 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { 3231 return false; 3232 } 3233 3234 bool 3235 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 3236 // The SPU target isn't yet aware of offsets. 3237 return false; 3238 } 3239 3240 // can we compare to Imm without writing it into a register? 3241 bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 3242 //ceqi, cgti, etc. all take s10 operand 3243 return isInt<10>(Imm); 3244 } 3245 3246 bool 3247 SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM, 3248 Type * ) const{ 3249 3250 // A-form: 18bit absolute address. 3251 if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0) 3252 return true; 3253 3254 // D-form: reg + 14bit offset 3255 if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs)) 3256 return true; 3257 3258 // X-form: reg+reg 3259 if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0) 3260 return true; 3261 3262 return false; 3263 } 3264