1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===// 2 // The LLVM Compiler Infrastructure 3 // 4 // This file is distributed under the University of Illinois Open Source 5 // License. See LICENSE.TXT for details. 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the SPUTargetLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "SPUISelLowering.h" 14 #include "SPUTargetMachine.h" 15 #include "SPUFrameLowering.h" 16 #include "SPUMachineFunction.h" 17 #include "llvm/Constants.h" 18 #include "llvm/Function.h" 19 #include "llvm/Intrinsics.h" 20 #include "llvm/CallingConv.h" 21 #include "llvm/Type.h" 22 #include "llvm/CodeGen/CallingConvLower.h" 23 #include "llvm/CodeGen/MachineFrameInfo.h" 24 #include "llvm/CodeGen/MachineFunction.h" 25 #include "llvm/CodeGen/MachineInstrBuilder.h" 26 #include "llvm/CodeGen/MachineRegisterInfo.h" 27 #include "llvm/CodeGen/SelectionDAG.h" 28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 29 #include "llvm/Target/TargetOptions.h" 30 #include "llvm/ADT/VectorExtras.h" 31 #include "llvm/Support/Debug.h" 32 #include "llvm/Support/ErrorHandling.h" 33 #include "llvm/Support/MathExtras.h" 34 #include "llvm/Support/raw_ostream.h" 35 #include <map> 36 37 using namespace llvm; 38 39 // Used in getTargetNodeName() below 40 namespace { 41 std::map<unsigned, const char *> node_names; 42 43 // Byte offset of the preferred slot (counted from the MSB) 44 int prefslotOffset(EVT VT) { 45 int retval=0; 46 if (VT==MVT::i1) retval=3; 47 if (VT==MVT::i8) retval=3; 48 if (VT==MVT::i16) retval=2; 49 50 return retval; 51 } 52 53 //! Expand a library call into an actual call DAG node 54 /*! 55 \note 56 This code is taken from SelectionDAGLegalize, since it is not exposed as 57 part of the LLVM SelectionDAG API. 58 */ 59 60 SDValue 61 ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG, 62 bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) { 63 // The input chain to this libcall is the entry node of the function. 64 // Legalizing the call will automatically add the previous call to the 65 // dependence. 66 SDValue InChain = DAG.getEntryNode(); 67 68 TargetLowering::ArgListTy Args; 69 TargetLowering::ArgListEntry Entry; 70 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { 71 EVT ArgVT = Op.getOperand(i).getValueType(); 72 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); 73 Entry.Node = Op.getOperand(i); 74 Entry.Ty = ArgTy; 75 Entry.isSExt = isSigned; 76 Entry.isZExt = !isSigned; 77 Args.push_back(Entry); 78 } 79 SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), 80 TLI.getPointerTy()); 81 82 // Splice the libcall in wherever FindInputOutputChains tells us to. 83 Type *RetTy = 84 Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext()); 85 std::pair<SDValue, SDValue> CallInfo = 86 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 87 0, TLI.getLibcallCallingConv(LC), false, 88 /*isReturnValueUsed=*/true, 89 Callee, Args, DAG, Op.getDebugLoc()); 90 91 return CallInfo.first; 92 } 93 } 94 95 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) 96 : TargetLowering(TM, new TargetLoweringObjectFileELF()), 97 SPUTM(TM) { 98 99 // Use _setjmp/_longjmp instead of setjmp/longjmp. 100 setUseUnderscoreSetJmp(true); 101 setUseUnderscoreLongJmp(true); 102 103 // Set RTLIB libcall names as used by SPU: 104 setLibcallName(RTLIB::DIV_F64, "__fast_divdf3"); 105 106 // Set up the SPU's register classes: 107 addRegisterClass(MVT::i8, SPU::R8CRegisterClass); 108 addRegisterClass(MVT::i16, SPU::R16CRegisterClass); 109 addRegisterClass(MVT::i32, SPU::R32CRegisterClass); 110 addRegisterClass(MVT::i64, SPU::R64CRegisterClass); 111 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass); 112 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass); 113 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass); 114 115 // SPU has no sign or zero extended loads for i1, i8, i16: 116 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); 117 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 118 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); 119 120 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 121 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); 122 123 setTruncStoreAction(MVT::i128, MVT::i64, Expand); 124 setTruncStoreAction(MVT::i128, MVT::i32, Expand); 125 setTruncStoreAction(MVT::i128, MVT::i16, Expand); 126 setTruncStoreAction(MVT::i128, MVT::i8, Expand); 127 128 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 129 130 // SPU constant load actions are custom lowered: 131 setOperationAction(ISD::ConstantFP, MVT::f32, Legal); 132 setOperationAction(ISD::ConstantFP, MVT::f64, Custom); 133 134 // SPU's loads and stores have to be custom lowered: 135 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128; 136 ++sctype) { 137 MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype; 138 139 setOperationAction(ISD::LOAD, VT, Custom); 140 setOperationAction(ISD::STORE, VT, Custom); 141 setLoadExtAction(ISD::EXTLOAD, VT, Custom); 142 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom); 143 setLoadExtAction(ISD::SEXTLOAD, VT, Custom); 144 145 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) { 146 MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype; 147 setTruncStoreAction(VT, StoreVT, Expand); 148 } 149 } 150 151 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64; 152 ++sctype) { 153 MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype; 154 155 setOperationAction(ISD::LOAD, VT, Custom); 156 setOperationAction(ISD::STORE, VT, Custom); 157 158 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) { 159 MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype; 160 setTruncStoreAction(VT, StoreVT, Expand); 161 } 162 } 163 164 // Expand the jumptable branches 165 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 166 setOperationAction(ISD::BR_CC, MVT::Other, Expand); 167 168 // Custom lower SELECT_CC for most cases, but expand by default 169 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); 170 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom); 171 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom); 172 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 173 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); 174 175 // SPU has no intrinsics for these particular operations: 176 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); 177 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); 178 179 // SPU has no division/remainder instructions 180 setOperationAction(ISD::SREM, MVT::i8, Expand); 181 setOperationAction(ISD::UREM, MVT::i8, Expand); 182 setOperationAction(ISD::SDIV, MVT::i8, Expand); 183 setOperationAction(ISD::UDIV, MVT::i8, Expand); 184 setOperationAction(ISD::SDIVREM, MVT::i8, Expand); 185 setOperationAction(ISD::UDIVREM, MVT::i8, Expand); 186 setOperationAction(ISD::SREM, MVT::i16, Expand); 187 setOperationAction(ISD::UREM, MVT::i16, Expand); 188 setOperationAction(ISD::SDIV, MVT::i16, Expand); 189 setOperationAction(ISD::UDIV, MVT::i16, Expand); 190 setOperationAction(ISD::SDIVREM, MVT::i16, Expand); 191 setOperationAction(ISD::UDIVREM, MVT::i16, Expand); 192 setOperationAction(ISD::SREM, MVT::i32, Expand); 193 setOperationAction(ISD::UREM, MVT::i32, Expand); 194 setOperationAction(ISD::SDIV, MVT::i32, Expand); 195 setOperationAction(ISD::UDIV, MVT::i32, Expand); 196 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 197 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 198 setOperationAction(ISD::SREM, MVT::i64, Expand); 199 setOperationAction(ISD::UREM, MVT::i64, Expand); 200 setOperationAction(ISD::SDIV, MVT::i64, Expand); 201 setOperationAction(ISD::UDIV, MVT::i64, Expand); 202 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 203 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 204 setOperationAction(ISD::SREM, MVT::i128, Expand); 205 setOperationAction(ISD::UREM, MVT::i128, Expand); 206 setOperationAction(ISD::SDIV, MVT::i128, Expand); 207 setOperationAction(ISD::UDIV, MVT::i128, Expand); 208 setOperationAction(ISD::SDIVREM, MVT::i128, Expand); 209 setOperationAction(ISD::UDIVREM, MVT::i128, Expand); 210 211 // We don't support sin/cos/sqrt/fmod 212 setOperationAction(ISD::FSIN , MVT::f64, Expand); 213 setOperationAction(ISD::FCOS , MVT::f64, Expand); 214 setOperationAction(ISD::FREM , MVT::f64, Expand); 215 setOperationAction(ISD::FSIN , MVT::f32, Expand); 216 setOperationAction(ISD::FCOS , MVT::f32, Expand); 217 setOperationAction(ISD::FREM , MVT::f32, Expand); 218 219 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt 220 // for f32!) 221 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 222 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 223 224 setOperationAction(ISD::FMA, MVT::f64, Expand); 225 setOperationAction(ISD::FMA, MVT::f32, Expand); 226 227 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 228 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 229 230 // SPU can do rotate right and left, so legalize it... but customize for i8 231 // because instructions don't exist. 232 233 // FIXME: Change from "expand" to appropriate type once ROTR is supported in 234 // .td files. 235 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/); 236 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/); 237 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/); 238 239 setOperationAction(ISD::ROTL, MVT::i32, Legal); 240 setOperationAction(ISD::ROTL, MVT::i16, Legal); 241 setOperationAction(ISD::ROTL, MVT::i8, Custom); 242 243 // SPU has no native version of shift left/right for i8 244 setOperationAction(ISD::SHL, MVT::i8, Custom); 245 setOperationAction(ISD::SRL, MVT::i8, Custom); 246 setOperationAction(ISD::SRA, MVT::i8, Custom); 247 248 // Make these operations legal and handle them during instruction selection: 249 setOperationAction(ISD::SHL, MVT::i64, Legal); 250 setOperationAction(ISD::SRL, MVT::i64, Legal); 251 setOperationAction(ISD::SRA, MVT::i64, Legal); 252 253 // Custom lower i8, i32 and i64 multiplications 254 setOperationAction(ISD::MUL, MVT::i8, Custom); 255 setOperationAction(ISD::MUL, MVT::i32, Legal); 256 setOperationAction(ISD::MUL, MVT::i64, Legal); 257 258 // Expand double-width multiplication 259 // FIXME: It would probably be reasonable to support some of these operations 260 setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand); 261 setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand); 262 setOperationAction(ISD::MULHU, MVT::i8, Expand); 263 setOperationAction(ISD::MULHS, MVT::i8, Expand); 264 setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand); 265 setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand); 266 setOperationAction(ISD::MULHU, MVT::i16, Expand); 267 setOperationAction(ISD::MULHS, MVT::i16, Expand); 268 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 269 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 270 setOperationAction(ISD::MULHU, MVT::i32, Expand); 271 setOperationAction(ISD::MULHS, MVT::i32, Expand); 272 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 273 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 274 setOperationAction(ISD::MULHU, MVT::i64, Expand); 275 setOperationAction(ISD::MULHS, MVT::i64, Expand); 276 277 // Need to custom handle (some) common i8, i64 math ops 278 setOperationAction(ISD::ADD, MVT::i8, Custom); 279 setOperationAction(ISD::ADD, MVT::i64, Legal); 280 setOperationAction(ISD::SUB, MVT::i8, Custom); 281 setOperationAction(ISD::SUB, MVT::i64, Legal); 282 283 // SPU does not have BSWAP. It does have i32 support CTLZ. 284 // CTPOP has to be custom lowered. 285 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 286 setOperationAction(ISD::BSWAP, MVT::i64, Expand); 287 288 setOperationAction(ISD::CTPOP, MVT::i8, Custom); 289 setOperationAction(ISD::CTPOP, MVT::i16, Custom); 290 setOperationAction(ISD::CTPOP, MVT::i32, Custom); 291 setOperationAction(ISD::CTPOP, MVT::i64, Custom); 292 setOperationAction(ISD::CTPOP, MVT::i128, Expand); 293 294 setOperationAction(ISD::CTTZ , MVT::i8, Expand); 295 setOperationAction(ISD::CTTZ , MVT::i16, Expand); 296 setOperationAction(ISD::CTTZ , MVT::i32, Expand); 297 setOperationAction(ISD::CTTZ , MVT::i64, Expand); 298 setOperationAction(ISD::CTTZ , MVT::i128, Expand); 299 300 setOperationAction(ISD::CTLZ , MVT::i8, Promote); 301 setOperationAction(ISD::CTLZ , MVT::i16, Promote); 302 setOperationAction(ISD::CTLZ , MVT::i32, Legal); 303 setOperationAction(ISD::CTLZ , MVT::i64, Expand); 304 setOperationAction(ISD::CTLZ , MVT::i128, Expand); 305 306 // SPU has a version of select that implements (a&~c)|(b&c), just like 307 // select ought to work: 308 setOperationAction(ISD::SELECT, MVT::i8, Legal); 309 setOperationAction(ISD::SELECT, MVT::i16, Legal); 310 setOperationAction(ISD::SELECT, MVT::i32, Legal); 311 setOperationAction(ISD::SELECT, MVT::i64, Legal); 312 313 setOperationAction(ISD::SETCC, MVT::i8, Legal); 314 setOperationAction(ISD::SETCC, MVT::i16, Legal); 315 setOperationAction(ISD::SETCC, MVT::i32, Legal); 316 setOperationAction(ISD::SETCC, MVT::i64, Legal); 317 setOperationAction(ISD::SETCC, MVT::f64, Custom); 318 319 // Custom lower i128 -> i64 truncates 320 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom); 321 322 // Custom lower i32/i64 -> i128 sign extend 323 setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom); 324 325 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote); 326 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); 327 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote); 328 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote); 329 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need 330 // to expand to a libcall, hence the custom lowering: 331 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 332 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 333 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand); 334 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 335 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand); 336 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand); 337 338 // FDIV on SPU requires custom lowering 339 setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall 340 341 // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64: 342 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 343 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); 344 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); 345 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 346 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); 347 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); 348 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 349 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 350 351 setOperationAction(ISD::BITCAST, MVT::i32, Legal); 352 setOperationAction(ISD::BITCAST, MVT::f32, Legal); 353 setOperationAction(ISD::BITCAST, MVT::i64, Legal); 354 setOperationAction(ISD::BITCAST, MVT::f64, Legal); 355 356 // We cannot sextinreg(i1). Expand to shifts. 357 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 358 359 // We want to legalize GlobalAddress and ConstantPool nodes into the 360 // appropriate instructions to materialize the address. 361 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128; 362 ++sctype) { 363 MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype; 364 365 setOperationAction(ISD::GlobalAddress, VT, Custom); 366 setOperationAction(ISD::ConstantPool, VT, Custom); 367 setOperationAction(ISD::JumpTable, VT, Custom); 368 } 369 370 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 371 setOperationAction(ISD::VASTART , MVT::Other, Custom); 372 373 // Use the default implementation. 374 setOperationAction(ISD::VAARG , MVT::Other, Expand); 375 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 376 setOperationAction(ISD::VAEND , MVT::Other, Expand); 377 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 378 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); 379 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 380 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand); 381 382 // Cell SPU has instructions for converting between i64 and fp. 383 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 384 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 385 386 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT 387 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); 388 389 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 390 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 391 392 // First set operation action for all vector types to expand. Then we 393 // will selectively turn on ones that can be effectively codegen'd. 394 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass); 395 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass); 396 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass); 397 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass); 398 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass); 399 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass); 400 401 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 402 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { 403 MVT::SimpleValueType VT = (MVT::SimpleValueType)i; 404 405 // Set operation actions to legal types only. 406 if (!isTypeLegal(VT)) continue; 407 408 // add/sub are legal for all supported vector VT's. 409 setOperationAction(ISD::ADD, VT, Legal); 410 setOperationAction(ISD::SUB, VT, Legal); 411 // mul has to be custom lowered. 412 setOperationAction(ISD::MUL, VT, Legal); 413 414 setOperationAction(ISD::AND, VT, Legal); 415 setOperationAction(ISD::OR, VT, Legal); 416 setOperationAction(ISD::XOR, VT, Legal); 417 setOperationAction(ISD::LOAD, VT, Custom); 418 setOperationAction(ISD::SELECT, VT, Legal); 419 setOperationAction(ISD::STORE, VT, Custom); 420 421 // These operations need to be expanded: 422 setOperationAction(ISD::SDIV, VT, Expand); 423 setOperationAction(ISD::SREM, VT, Expand); 424 setOperationAction(ISD::UDIV, VT, Expand); 425 setOperationAction(ISD::UREM, VT, Expand); 426 427 // Expand all trunc stores 428 for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 429 j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) { 430 MVT::SimpleValueType TargetVT = (MVT::SimpleValueType)j; 431 setTruncStoreAction(VT, TargetVT, Expand); 432 } 433 434 // Custom lower build_vector, constant pool spills, insert and 435 // extract vector elements: 436 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 437 setOperationAction(ISD::ConstantPool, VT, Custom); 438 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); 439 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 440 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 441 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 442 } 443 444 setOperationAction(ISD::SHL, MVT::v2i64, Expand); 445 446 setOperationAction(ISD::AND, MVT::v16i8, Custom); 447 setOperationAction(ISD::OR, MVT::v16i8, Custom); 448 setOperationAction(ISD::XOR, MVT::v16i8, Custom); 449 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 450 451 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 452 453 setBooleanContents(ZeroOrNegativeOneBooleanContent); 454 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // FIXME: Is this correct? 455 456 setStackPointerRegisterToSaveRestore(SPU::R1); 457 458 // We have target-specific dag combine patterns for the following nodes: 459 setTargetDAGCombine(ISD::ADD); 460 setTargetDAGCombine(ISD::ZERO_EXTEND); 461 setTargetDAGCombine(ISD::SIGN_EXTEND); 462 setTargetDAGCombine(ISD::ANY_EXTEND); 463 464 setMinFunctionAlignment(3); 465 466 computeRegisterProperties(); 467 468 // Set pre-RA register scheduler default to BURR, which produces slightly 469 // better code than the default (could also be TDRR, but TargetLowering.h 470 // needs a mod to support that model): 471 setSchedulingPreference(Sched::RegPressure); 472 } 473 474 const char * 475 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const 476 { 477 if (node_names.empty()) { 478 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG"; 479 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi"; 480 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo"; 481 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr"; 482 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr"; 483 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr"; 484 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT"; 485 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL"; 486 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB"; 487 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK"; 488 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB"; 489 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC"; 490 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT"; 491 node_names[(unsigned) SPUISD::SHL_BITS] = "SPUISD::SHL_BITS"; 492 node_names[(unsigned) SPUISD::SHL_BYTES] = "SPUISD::SHL_BYTES"; 493 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL"; 494 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR"; 495 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT"; 496 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] = 497 "SPUISD::ROTBYTES_LEFT_BITS"; 498 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK"; 499 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB"; 500 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER"; 501 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER"; 502 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER"; 503 } 504 505 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode); 506 507 return ((i != node_names.end()) ? i->second : 0); 508 } 509 510 //===----------------------------------------------------------------------===// 511 // Return the Cell SPU's SETCC result type 512 //===----------------------------------------------------------------------===// 513 514 EVT SPUTargetLowering::getSetCCResultType(EVT VT) const { 515 // i8, i16 and i32 are valid SETCC result types 516 MVT::SimpleValueType retval; 517 518 switch(VT.getSimpleVT().SimpleTy){ 519 case MVT::i1: 520 case MVT::i8: 521 retval = MVT::i8; break; 522 case MVT::i16: 523 retval = MVT::i16; break; 524 case MVT::i32: 525 default: 526 retval = MVT::i32; 527 } 528 return retval; 529 } 530 531 //===----------------------------------------------------------------------===// 532 // Calling convention code: 533 //===----------------------------------------------------------------------===// 534 535 #include "SPUGenCallingConv.inc" 536 537 //===----------------------------------------------------------------------===// 538 // LowerOperation implementation 539 //===----------------------------------------------------------------------===// 540 541 /// Custom lower loads for CellSPU 542 /*! 543 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements 544 within a 16-byte block, we have to rotate to extract the requested element. 545 546 For extending loads, we also want to ensure that the following sequence is 547 emitted, e.g. for MVT::f32 extending load to MVT::f64: 548 549 \verbatim 550 %1 v16i8,ch = load 551 %2 v16i8,ch = rotate %1 552 %3 v4f8, ch = bitconvert %2 553 %4 f32 = vec2perfslot %3 554 %5 f64 = fp_extend %4 555 \endverbatim 556 */ 557 static SDValue 558 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 559 LoadSDNode *LN = cast<LoadSDNode>(Op); 560 SDValue the_chain = LN->getChain(); 561 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 562 EVT InVT = LN->getMemoryVT(); 563 EVT OutVT = Op.getValueType(); 564 ISD::LoadExtType ExtType = LN->getExtensionType(); 565 unsigned alignment = LN->getAlignment(); 566 int pso = prefslotOffset(InVT); 567 DebugLoc dl = Op.getDebugLoc(); 568 EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT, 569 (128 / InVT.getSizeInBits())); 570 571 // two sanity checks 572 assert( LN->getAddressingMode() == ISD::UNINDEXED 573 && "we should get only UNINDEXED adresses"); 574 // clean aligned loads can be selected as-is 575 if (InVT.getSizeInBits() == 128 && (alignment%16) == 0) 576 return SDValue(); 577 578 // Get pointerinfos to the memory chunk(s) that contain the data to load 579 uint64_t mpi_offset = LN->getPointerInfo().Offset; 580 mpi_offset -= mpi_offset%16; 581 MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset); 582 MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16); 583 584 SDValue result; 585 SDValue basePtr = LN->getBasePtr(); 586 SDValue rotate; 587 588 if ((alignment%16) == 0) { 589 ConstantSDNode *CN; 590 591 // Special cases for a known aligned load to simplify the base pointer 592 // and the rotation amount: 593 if (basePtr.getOpcode() == ISD::ADD 594 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) { 595 // Known offset into basePtr 596 int64_t offset = CN->getSExtValue(); 597 int64_t rotamt = int64_t((offset & 0xf) - pso); 598 599 if (rotamt < 0) 600 rotamt += 16; 601 602 rotate = DAG.getConstant(rotamt, MVT::i16); 603 604 // Simplify the base pointer for this case: 605 basePtr = basePtr.getOperand(0); 606 if ((offset & ~0xf) > 0) { 607 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 608 basePtr, 609 DAG.getConstant((offset & ~0xf), PtrVT)); 610 } 611 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr) 612 || (basePtr.getOpcode() == SPUISD::IndirectAddr 613 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi 614 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) { 615 // Plain aligned a-form address: rotate into preferred slot 616 // Same for (SPUindirect (SPUhi ...), (SPUlo ...)) 617 int64_t rotamt = -pso; 618 if (rotamt < 0) 619 rotamt += 16; 620 rotate = DAG.getConstant(rotamt, MVT::i16); 621 } else { 622 // Offset the rotate amount by the basePtr and the preferred slot 623 // byte offset 624 int64_t rotamt = -pso; 625 if (rotamt < 0) 626 rotamt += 16; 627 rotate = DAG.getNode(ISD::ADD, dl, PtrVT, 628 basePtr, 629 DAG.getConstant(rotamt, PtrVT)); 630 } 631 } else { 632 // Unaligned load: must be more pessimistic about addressing modes: 633 if (basePtr.getOpcode() == ISD::ADD) { 634 MachineFunction &MF = DAG.getMachineFunction(); 635 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 636 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); 637 SDValue Flag; 638 639 SDValue Op0 = basePtr.getOperand(0); 640 SDValue Op1 = basePtr.getOperand(1); 641 642 if (isa<ConstantSDNode>(Op1)) { 643 // Convert the (add <ptr>, <const>) to an indirect address contained 644 // in a register. Note that this is done because we need to avoid 645 // creating a 0(reg) d-form address due to the SPU's block loads. 646 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); 647 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); 648 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); 649 } else { 650 // Convert the (add <arg1>, <arg2>) to an indirect address, which 651 // will likely be lowered as a reg(reg) x-form address. 652 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); 653 } 654 } else { 655 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 656 basePtr, 657 DAG.getConstant(0, PtrVT)); 658 } 659 660 // Offset the rotate amount by the basePtr and the preferred slot 661 // byte offset 662 rotate = DAG.getNode(ISD::ADD, dl, PtrVT, 663 basePtr, 664 DAG.getConstant(-pso, PtrVT)); 665 } 666 667 // Do the load as a i128 to allow possible shifting 668 SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr, 669 lowMemPtr, 670 LN->isVolatile(), LN->isNonTemporal(), 16); 671 672 // When the size is not greater than alignment we get all data with just 673 // one load 674 if (alignment >= InVT.getSizeInBits()/8) { 675 // Update the chain 676 the_chain = low.getValue(1); 677 678 // Rotate into the preferred slot: 679 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128, 680 low.getValue(0), rotate); 681 682 // Convert the loaded v16i8 vector to the appropriate vector type 683 // specified by the operand: 684 EVT vecVT = EVT::getVectorVT(*DAG.getContext(), 685 InVT, (128 / InVT.getSizeInBits())); 686 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, 687 DAG.getNode(ISD::BITCAST, dl, vecVT, result)); 688 } 689 // When alignment is less than the size, we might need (known only at 690 // run-time) two loads 691 // TODO: if the memory address is composed only from constants, we have 692 // extra kowledge, and might avoid the second load 693 else { 694 // storage position offset from lower 16 byte aligned memory chunk 695 SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32, 696 basePtr, DAG.getConstant( 0xf, MVT::i32 ) ); 697 // get a registerfull of ones. (this implementation is a workaround: LLVM 698 // cannot handle 128 bit signed int constants) 699 SDValue ones = DAG.getConstant(-1, MVT::v4i32 ); 700 ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones); 701 702 SDValue high = DAG.getLoad(MVT::i128, dl, the_chain, 703 DAG.getNode(ISD::ADD, dl, PtrVT, 704 basePtr, 705 DAG.getConstant(16, PtrVT)), 706 highMemPtr, 707 LN->isVolatile(), LN->isNonTemporal(), 16); 708 709 the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1), 710 high.getValue(1)); 711 712 // Shift the (possible) high part right to compensate the misalignemnt. 713 // if there is no highpart (i.e. value is i64 and offset is 4), this 714 // will zero out the high value. 715 high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high, 716 DAG.getNode(ISD::SUB, dl, MVT::i32, 717 DAG.getConstant( 16, MVT::i32), 718 offset 719 )); 720 721 // Shift the low similarly 722 // TODO: add SPUISD::SHL_BYTES 723 low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset ); 724 725 // Merge the two parts 726 result = DAG.getNode(ISD::BITCAST, dl, vecVT, 727 DAG.getNode(ISD::OR, dl, MVT::i128, low, high)); 728 729 if (!InVT.isVector()) { 730 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result ); 731 } 732 733 } 734 // Handle extending loads by extending the scalar result: 735 if (ExtType == ISD::SEXTLOAD) { 736 result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result); 737 } else if (ExtType == ISD::ZEXTLOAD) { 738 result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result); 739 } else if (ExtType == ISD::EXTLOAD) { 740 unsigned NewOpc = ISD::ANY_EXTEND; 741 742 if (OutVT.isFloatingPoint()) 743 NewOpc = ISD::FP_EXTEND; 744 745 result = DAG.getNode(NewOpc, dl, OutVT, result); 746 } 747 748 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other); 749 SDValue retops[2] = { 750 result, 751 the_chain 752 }; 753 754 result = DAG.getNode(SPUISD::LDRESULT, dl, retvts, 755 retops, sizeof(retops) / sizeof(retops[0])); 756 return result; 757 } 758 759 /// Custom lower stores for CellSPU 760 /*! 761 All CellSPU stores are aligned to 16-byte boundaries, so for elements 762 within a 16-byte block, we have to generate a shuffle to insert the 763 requested element into its place, then store the resulting block. 764 */ 765 static SDValue 766 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 767 StoreSDNode *SN = cast<StoreSDNode>(Op); 768 SDValue Value = SN->getValue(); 769 EVT VT = Value.getValueType(); 770 EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT()); 771 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 772 DebugLoc dl = Op.getDebugLoc(); 773 unsigned alignment = SN->getAlignment(); 774 SDValue result; 775 EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT, 776 (128 / StVT.getSizeInBits())); 777 // Get pointerinfos to the memory chunk(s) that contain the data to load 778 uint64_t mpi_offset = SN->getPointerInfo().Offset; 779 mpi_offset -= mpi_offset%16; 780 MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset); 781 MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16); 782 783 784 // two sanity checks 785 assert( SN->getAddressingMode() == ISD::UNINDEXED 786 && "we should get only UNINDEXED adresses"); 787 // clean aligned loads can be selected as-is 788 if (StVT.getSizeInBits() == 128 && (alignment%16) == 0) 789 return SDValue(); 790 791 SDValue alignLoadVec; 792 SDValue basePtr = SN->getBasePtr(); 793 SDValue the_chain = SN->getChain(); 794 SDValue insertEltOffs; 795 796 if ((alignment%16) == 0) { 797 ConstantSDNode *CN; 798 // Special cases for a known aligned load to simplify the base pointer 799 // and insertion byte: 800 if (basePtr.getOpcode() == ISD::ADD 801 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) { 802 // Known offset into basePtr 803 int64_t offset = CN->getSExtValue(); 804 805 // Simplify the base pointer for this case: 806 basePtr = basePtr.getOperand(0); 807 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 808 basePtr, 809 DAG.getConstant((offset & 0xf), PtrVT)); 810 811 if ((offset & ~0xf) > 0) { 812 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 813 basePtr, 814 DAG.getConstant((offset & ~0xf), PtrVT)); 815 } 816 } else { 817 // Otherwise, assume it's at byte 0 of basePtr 818 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 819 basePtr, 820 DAG.getConstant(0, PtrVT)); 821 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 822 basePtr, 823 DAG.getConstant(0, PtrVT)); 824 } 825 } else { 826 // Unaligned load: must be more pessimistic about addressing modes: 827 if (basePtr.getOpcode() == ISD::ADD) { 828 MachineFunction &MF = DAG.getMachineFunction(); 829 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 830 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); 831 SDValue Flag; 832 833 SDValue Op0 = basePtr.getOperand(0); 834 SDValue Op1 = basePtr.getOperand(1); 835 836 if (isa<ConstantSDNode>(Op1)) { 837 // Convert the (add <ptr>, <const>) to an indirect address contained 838 // in a register. Note that this is done because we need to avoid 839 // creating a 0(reg) d-form address due to the SPU's block loads. 840 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); 841 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); 842 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); 843 } else { 844 // Convert the (add <arg1>, <arg2>) to an indirect address, which 845 // will likely be lowered as a reg(reg) x-form address. 846 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); 847 } 848 } else { 849 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 850 basePtr, 851 DAG.getConstant(0, PtrVT)); 852 } 853 854 // Insertion point is solely determined by basePtr's contents 855 insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT, 856 basePtr, 857 DAG.getConstant(0, PtrVT)); 858 } 859 860 // Load the lower part of the memory to which to store. 861 SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr, 862 lowMemPtr, SN->isVolatile(), SN->isNonTemporal(), 16); 863 864 // if we don't need to store over the 16 byte boundary, one store suffices 865 if (alignment >= StVT.getSizeInBits()/8) { 866 // Update the chain 867 the_chain = low.getValue(1); 868 869 LoadSDNode *LN = cast<LoadSDNode>(low); 870 SDValue theValue = SN->getValue(); 871 872 if (StVT != VT 873 && (theValue.getOpcode() == ISD::AssertZext 874 || theValue.getOpcode() == ISD::AssertSext)) { 875 // Drill down and get the value for zero- and sign-extended 876 // quantities 877 theValue = theValue.getOperand(0); 878 } 879 880 // If the base pointer is already a D-form address, then just create 881 // a new D-form address with a slot offset and the orignal base pointer. 882 // Otherwise generate a D-form address with the slot offset relative 883 // to the stack pointer, which is always aligned. 884 #if !defined(NDEBUG) 885 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 886 errs() << "CellSPU LowerSTORE: basePtr = "; 887 basePtr.getNode()->dump(&DAG); 888 errs() << "\n"; 889 } 890 #endif 891 892 SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, 893 insertEltOffs); 894 SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, 895 theValue); 896 897 result = DAG.getNode(SPUISD::SHUFB, dl, vecVT, 898 vectorizeOp, low, 899 DAG.getNode(ISD::BITCAST, dl, 900 MVT::v4i32, insertEltOp)); 901 902 result = DAG.getStore(the_chain, dl, result, basePtr, 903 lowMemPtr, 904 LN->isVolatile(), LN->isNonTemporal(), 905 16); 906 907 } 908 // do the store when it might cross the 16 byte memory access boundary. 909 else { 910 // TODO issue a warning if SN->isVolatile()== true? This is likely not 911 // what the user wanted. 912 913 // address offset from nearest lower 16byte alinged address 914 SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32, 915 SN->getBasePtr(), 916 DAG.getConstant(0xf, MVT::i32)); 917 // 16 - offset 918 SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32, 919 DAG.getConstant( 16, MVT::i32), 920 offset); 921 // 16 - sizeof(Value) 922 SDValue surplus = DAG.getNode(ISD::SUB, dl, MVT::i32, 923 DAG.getConstant( 16, MVT::i32), 924 DAG.getConstant( VT.getSizeInBits()/8, 925 MVT::i32)); 926 // get a registerfull of ones 927 SDValue ones = DAG.getConstant(-1, MVT::v4i32); 928 ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones); 929 930 // Create the 128 bit masks that have ones where the data to store is 931 // located. 932 SDValue lowmask, himask; 933 // if the value to store don't fill up the an entire 128 bits, zero 934 // out the last bits of the mask so that only the value we want to store 935 // is masked. 936 // this is e.g. in the case of store i32, align 2 937 if (!VT.isVector()){ 938 Value = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, Value); 939 lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, ones, surplus); 940 lowmask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask, 941 surplus); 942 Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value); 943 Value = DAG.getNode(ISD::AND, dl, MVT::i128, Value, lowmask); 944 945 } 946 else { 947 lowmask = ones; 948 Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value); 949 } 950 // this will zero, if there are no data that goes to the high quad 951 himask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask, 952 offset_compl); 953 lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, lowmask, 954 offset); 955 956 // Load in the old data and zero out the parts that will be overwritten with 957 // the new data to store. 958 SDValue hi = DAG.getLoad(MVT::i128, dl, the_chain, 959 DAG.getNode(ISD::ADD, dl, PtrVT, basePtr, 960 DAG.getConstant( 16, PtrVT)), 961 highMemPtr, 962 SN->isVolatile(), SN->isNonTemporal(), 16); 963 the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1), 964 hi.getValue(1)); 965 966 low = DAG.getNode(ISD::AND, dl, MVT::i128, 967 DAG.getNode( ISD::BITCAST, dl, MVT::i128, low), 968 DAG.getNode( ISD::XOR, dl, MVT::i128, lowmask, ones)); 969 hi = DAG.getNode(ISD::AND, dl, MVT::i128, 970 DAG.getNode( ISD::BITCAST, dl, MVT::i128, hi), 971 DAG.getNode( ISD::XOR, dl, MVT::i128, himask, ones)); 972 973 // Shift the Value to store into place. rlow contains the parts that go to 974 // the lower memory chunk, rhi has the parts that go to the upper one. 975 SDValue rlow = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, Value, offset); 976 rlow = DAG.getNode(ISD::AND, dl, MVT::i128, rlow, lowmask); 977 SDValue rhi = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, Value, 978 offset_compl); 979 980 // Merge the old data and the new data and store the results 981 // Need to convert vectors here to integer as 'OR'ing floats assert 982 rlow = DAG.getNode(ISD::OR, dl, MVT::i128, 983 DAG.getNode(ISD::BITCAST, dl, MVT::i128, low), 984 DAG.getNode(ISD::BITCAST, dl, MVT::i128, rlow)); 985 rhi = DAG.getNode(ISD::OR, dl, MVT::i128, 986 DAG.getNode(ISD::BITCAST, dl, MVT::i128, hi), 987 DAG.getNode(ISD::BITCAST, dl, MVT::i128, rhi)); 988 989 low = DAG.getStore(the_chain, dl, rlow, basePtr, 990 lowMemPtr, 991 SN->isVolatile(), SN->isNonTemporal(), 16); 992 hi = DAG.getStore(the_chain, dl, rhi, 993 DAG.getNode(ISD::ADD, dl, PtrVT, basePtr, 994 DAG.getConstant( 16, PtrVT)), 995 highMemPtr, 996 SN->isVolatile(), SN->isNonTemporal(), 16); 997 result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(0), 998 hi.getValue(0)); 999 } 1000 1001 return result; 1002 } 1003 1004 //! Generate the address of a constant pool entry. 1005 static SDValue 1006 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 1007 EVT PtrVT = Op.getValueType(); 1008 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1009 const Constant *C = CP->getConstVal(); 1010 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); 1011 SDValue Zero = DAG.getConstant(0, PtrVT); 1012 const TargetMachine &TM = DAG.getTarget(); 1013 // FIXME there is no actual debug info here 1014 DebugLoc dl = Op.getDebugLoc(); 1015 1016 if (TM.getRelocationModel() == Reloc::Static) { 1017 if (!ST->usingLargeMem()) { 1018 // Just return the SDValue with the constant pool address in it. 1019 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero); 1020 } else { 1021 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero); 1022 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero); 1023 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); 1024 } 1025 } 1026 1027 llvm_unreachable("LowerConstantPool: Relocation model other than static" 1028 " not supported."); 1029 return SDValue(); 1030 } 1031 1032 //! Alternate entry point for generating the address of a constant pool entry 1033 SDValue 1034 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) { 1035 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl()); 1036 } 1037 1038 static SDValue 1039 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 1040 EVT PtrVT = Op.getValueType(); 1041 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 1042 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 1043 SDValue Zero = DAG.getConstant(0, PtrVT); 1044 const TargetMachine &TM = DAG.getTarget(); 1045 // FIXME there is no actual debug info here 1046 DebugLoc dl = Op.getDebugLoc(); 1047 1048 if (TM.getRelocationModel() == Reloc::Static) { 1049 if (!ST->usingLargeMem()) { 1050 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero); 1051 } else { 1052 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero); 1053 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero); 1054 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); 1055 } 1056 } 1057 1058 llvm_unreachable("LowerJumpTable: Relocation model other than static" 1059 " not supported."); 1060 return SDValue(); 1061 } 1062 1063 static SDValue 1064 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 1065 EVT PtrVT = Op.getValueType(); 1066 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 1067 const GlobalValue *GV = GSDN->getGlobal(); 1068 SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), 1069 PtrVT, GSDN->getOffset()); 1070 const TargetMachine &TM = DAG.getTarget(); 1071 SDValue Zero = DAG.getConstant(0, PtrVT); 1072 // FIXME there is no actual debug info here 1073 DebugLoc dl = Op.getDebugLoc(); 1074 1075 if (TM.getRelocationModel() == Reloc::Static) { 1076 if (!ST->usingLargeMem()) { 1077 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero); 1078 } else { 1079 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero); 1080 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero); 1081 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); 1082 } 1083 } else { 1084 report_fatal_error("LowerGlobalAddress: Relocation model other than static" 1085 "not supported."); 1086 /*NOTREACHED*/ 1087 } 1088 1089 return SDValue(); 1090 } 1091 1092 //! Custom lower double precision floating point constants 1093 static SDValue 1094 LowerConstantFP(SDValue Op, SelectionDAG &DAG) { 1095 EVT VT = Op.getValueType(); 1096 // FIXME there is no actual debug info here 1097 DebugLoc dl = Op.getDebugLoc(); 1098 1099 if (VT == MVT::f64) { 1100 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode()); 1101 1102 assert((FP != 0) && 1103 "LowerConstantFP: Node is not ConstantFPSDNode"); 1104 1105 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble()); 1106 SDValue T = DAG.getConstant(dbits, MVT::i64); 1107 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T); 1108 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, 1109 DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Tvec)); 1110 } 1111 1112 return SDValue(); 1113 } 1114 1115 SDValue 1116 SPUTargetLowering::LowerFormalArguments(SDValue Chain, 1117 CallingConv::ID CallConv, bool isVarArg, 1118 const SmallVectorImpl<ISD::InputArg> 1119 &Ins, 1120 DebugLoc dl, SelectionDAG &DAG, 1121 SmallVectorImpl<SDValue> &InVals) 1122 const { 1123 1124 MachineFunction &MF = DAG.getMachineFunction(); 1125 MachineFrameInfo *MFI = MF.getFrameInfo(); 1126 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1127 SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>(); 1128 1129 unsigned ArgOffset = SPUFrameLowering::minStackSize(); 1130 unsigned ArgRegIdx = 0; 1131 unsigned StackSlotSize = SPUFrameLowering::stackSlotSize(); 1132 1133 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1134 1135 SmallVector<CCValAssign, 16> ArgLocs; 1136 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1137 getTargetMachine(), ArgLocs, *DAG.getContext()); 1138 // FIXME: allow for other calling conventions 1139 CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU); 1140 1141 // Add DAG nodes to load the arguments or copy them out of registers. 1142 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 1143 EVT ObjectVT = Ins[ArgNo].VT; 1144 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 1145 SDValue ArgVal; 1146 CCValAssign &VA = ArgLocs[ArgNo]; 1147 1148 if (VA.isRegLoc()) { 1149 const TargetRegisterClass *ArgRegClass; 1150 1151 switch (ObjectVT.getSimpleVT().SimpleTy) { 1152 default: 1153 report_fatal_error("LowerFormalArguments Unhandled argument type: " + 1154 Twine(ObjectVT.getEVTString())); 1155 case MVT::i8: 1156 ArgRegClass = &SPU::R8CRegClass; 1157 break; 1158 case MVT::i16: 1159 ArgRegClass = &SPU::R16CRegClass; 1160 break; 1161 case MVT::i32: 1162 ArgRegClass = &SPU::R32CRegClass; 1163 break; 1164 case MVT::i64: 1165 ArgRegClass = &SPU::R64CRegClass; 1166 break; 1167 case MVT::i128: 1168 ArgRegClass = &SPU::GPRCRegClass; 1169 break; 1170 case MVT::f32: 1171 ArgRegClass = &SPU::R32FPRegClass; 1172 break; 1173 case MVT::f64: 1174 ArgRegClass = &SPU::R64FPRegClass; 1175 break; 1176 case MVT::v2f64: 1177 case MVT::v4f32: 1178 case MVT::v2i64: 1179 case MVT::v4i32: 1180 case MVT::v8i16: 1181 case MVT::v16i8: 1182 ArgRegClass = &SPU::VECREGRegClass; 1183 break; 1184 } 1185 1186 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass); 1187 RegInfo.addLiveIn(VA.getLocReg(), VReg); 1188 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 1189 ++ArgRegIdx; 1190 } else { 1191 // We need to load the argument to a virtual register if we determined 1192 // above that we ran out of physical registers of the appropriate type 1193 // or we're forced to do vararg 1194 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true); 1195 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 1196 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), 1197 false, false, 0); 1198 ArgOffset += StackSlotSize; 1199 } 1200 1201 InVals.push_back(ArgVal); 1202 // Update the chain 1203 Chain = ArgVal.getOperand(0); 1204 } 1205 1206 // vararg handling: 1207 if (isVarArg) { 1208 // FIXME: we should be able to query the argument registers from 1209 // tablegen generated code. 1210 static const unsigned ArgRegs[] = { 1211 SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9, 1212 SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16, 1213 SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23, 1214 SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30, 1215 SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37, 1216 SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44, 1217 SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51, 1218 SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58, 1219 SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65, 1220 SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72, 1221 SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79 1222 }; 1223 // size of ArgRegs array 1224 unsigned NumArgRegs = 77; 1225 1226 // We will spill (79-3)+1 registers to the stack 1227 SmallVector<SDValue, 79-3+1> MemOps; 1228 1229 // Create the frame slot 1230 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) { 1231 FuncInfo->setVarArgsFrameIndex( 1232 MFI->CreateFixedObject(StackSlotSize, ArgOffset, true)); 1233 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 1234 unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::VECREGRegClass); 1235 SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8); 1236 SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(), 1237 false, false, 0); 1238 Chain = Store.getOperand(0); 1239 MemOps.push_back(Store); 1240 1241 // Increment address by stack slot size for the next stored argument 1242 ArgOffset += StackSlotSize; 1243 } 1244 if (!MemOps.empty()) 1245 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1246 &MemOps[0], MemOps.size()); 1247 } 1248 1249 return Chain; 1250 } 1251 1252 /// isLSAAddress - Return the immediate to use if the specified 1253 /// value is representable as a LSA address. 1254 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) { 1255 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 1256 if (!C) return 0; 1257 1258 int Addr = C->getZExtValue(); 1259 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 1260 (Addr << 14 >> 14) != Addr) 1261 return 0; // Top 14 bits have to be sext of immediate. 1262 1263 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode(); 1264 } 1265 1266 SDValue 1267 SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, 1268 CallingConv::ID CallConv, bool isVarArg, 1269 bool &isTailCall, 1270 const SmallVectorImpl<ISD::OutputArg> &Outs, 1271 const SmallVectorImpl<SDValue> &OutVals, 1272 const SmallVectorImpl<ISD::InputArg> &Ins, 1273 DebugLoc dl, SelectionDAG &DAG, 1274 SmallVectorImpl<SDValue> &InVals) const { 1275 // CellSPU target does not yet support tail call optimization. 1276 isTailCall = false; 1277 1278 const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); 1279 unsigned NumOps = Outs.size(); 1280 unsigned StackSlotSize = SPUFrameLowering::stackSlotSize(); 1281 1282 SmallVector<CCValAssign, 16> ArgLocs; 1283 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1284 getTargetMachine(), ArgLocs, *DAG.getContext()); 1285 // FIXME: allow for other calling conventions 1286 CCInfo.AnalyzeCallOperands(Outs, CCC_SPU); 1287 1288 const unsigned NumArgRegs = ArgLocs.size(); 1289 1290 1291 // Handy pointer type 1292 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1293 1294 // Set up a copy of the stack pointer for use loading and storing any 1295 // arguments that may not fit in the registers available for argument 1296 // passing. 1297 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32); 1298 1299 // Figure out which arguments are going to go in registers, and which in 1300 // memory. 1301 unsigned ArgOffset = SPUFrameLowering::minStackSize(); // Just below [LR] 1302 unsigned ArgRegIdx = 0; 1303 1304 // Keep track of registers passing arguments 1305 std::vector<std::pair<unsigned, SDValue> > RegsToPass; 1306 // And the arguments passed on the stack 1307 SmallVector<SDValue, 8> MemOpChains; 1308 1309 for (; ArgRegIdx != NumOps; ++ArgRegIdx) { 1310 SDValue Arg = OutVals[ArgRegIdx]; 1311 CCValAssign &VA = ArgLocs[ArgRegIdx]; 1312 1313 // PtrOff will be used to store the current argument to the stack if a 1314 // register cannot be found for it. 1315 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 1316 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 1317 1318 switch (Arg.getValueType().getSimpleVT().SimpleTy) { 1319 default: llvm_unreachable("Unexpected ValueType for argument!"); 1320 case MVT::i8: 1321 case MVT::i16: 1322 case MVT::i32: 1323 case MVT::i64: 1324 case MVT::i128: 1325 case MVT::f32: 1326 case MVT::f64: 1327 case MVT::v2i64: 1328 case MVT::v2f64: 1329 case MVT::v4f32: 1330 case MVT::v4i32: 1331 case MVT::v8i16: 1332 case MVT::v16i8: 1333 if (ArgRegIdx != NumArgRegs) { 1334 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1335 } else { 1336 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 1337 MachinePointerInfo(), 1338 false, false, 0)); 1339 ArgOffset += StackSlotSize; 1340 } 1341 break; 1342 } 1343 } 1344 1345 // Accumulate how many bytes are to be pushed on the stack, including the 1346 // linkage area, and parameter passing area. According to the SPU ABI, 1347 // we minimally need space for [LR] and [SP]. 1348 unsigned NumStackBytes = ArgOffset - SPUFrameLowering::minStackSize(); 1349 1350 // Insert a call sequence start 1351 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes, 1352 true)); 1353 1354 if (!MemOpChains.empty()) { 1355 // Adjust the stack pointer for the stack arguments. 1356 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1357 &MemOpChains[0], MemOpChains.size()); 1358 } 1359 1360 // Build a sequence of copy-to-reg nodes chained together with token chain 1361 // and flag operands which copy the outgoing args into the appropriate regs. 1362 SDValue InFlag; 1363 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1364 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1365 RegsToPass[i].second, InFlag); 1366 InFlag = Chain.getValue(1); 1367 } 1368 1369 SmallVector<SDValue, 8> Ops; 1370 unsigned CallOpc = SPUISD::CALL; 1371 1372 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 1373 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1374 // node so that legalize doesn't hack it. 1375 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1376 const GlobalValue *GV = G->getGlobal(); 1377 EVT CalleeVT = Callee.getValueType(); 1378 SDValue Zero = DAG.getConstant(0, PtrVT); 1379 SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT); 1380 1381 if (!ST->usingLargeMem()) { 1382 // Turn calls to targets that are defined (i.e., have bodies) into BRSL 1383 // style calls, otherwise, external symbols are BRASL calls. This assumes 1384 // that declared/defined symbols are in the same compilation unit and can 1385 // be reached through PC-relative jumps. 1386 // 1387 // NOTE: 1388 // This may be an unsafe assumption for JIT and really large compilation 1389 // units. 1390 if (GV->isDeclaration()) { 1391 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero); 1392 } else { 1393 Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero); 1394 } 1395 } else { 1396 // "Large memory" mode: Turn all calls into indirect calls with a X-form 1397 // address pairs: 1398 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero); 1399 } 1400 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1401 EVT CalleeVT = Callee.getValueType(); 1402 SDValue Zero = DAG.getConstant(0, PtrVT); 1403 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(), 1404 Callee.getValueType()); 1405 1406 if (!ST->usingLargeMem()) { 1407 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero); 1408 } else { 1409 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero); 1410 } 1411 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) { 1412 // If this is an absolute destination address that appears to be a legal 1413 // local store address, use the munged value. 1414 Callee = SDValue(Dest, 0); 1415 } 1416 1417 Ops.push_back(Chain); 1418 Ops.push_back(Callee); 1419 1420 // Add argument registers to the end of the list so that they are known live 1421 // into the call. 1422 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1423 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1424 RegsToPass[i].second.getValueType())); 1425 1426 if (InFlag.getNode()) 1427 Ops.push_back(InFlag); 1428 // Returns a chain and a flag for retval copy to use. 1429 Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Glue), 1430 &Ops[0], Ops.size()); 1431 InFlag = Chain.getValue(1); 1432 1433 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true), 1434 DAG.getIntPtrConstant(0, true), InFlag); 1435 if (!Ins.empty()) 1436 InFlag = Chain.getValue(1); 1437 1438 // If the function returns void, just return the chain. 1439 if (Ins.empty()) 1440 return Chain; 1441 1442 // Now handle the return value(s) 1443 SmallVector<CCValAssign, 16> RVLocs; 1444 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1445 getTargetMachine(), RVLocs, *DAG.getContext()); 1446 CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU); 1447 1448 1449 // If the call has results, copy the values out of the ret val registers. 1450 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1451 CCValAssign VA = RVLocs[i]; 1452 1453 SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), 1454 InFlag); 1455 Chain = Val.getValue(1); 1456 InFlag = Val.getValue(2); 1457 InVals.push_back(Val); 1458 } 1459 1460 return Chain; 1461 } 1462 1463 SDValue 1464 SPUTargetLowering::LowerReturn(SDValue Chain, 1465 CallingConv::ID CallConv, bool isVarArg, 1466 const SmallVectorImpl<ISD::OutputArg> &Outs, 1467 const SmallVectorImpl<SDValue> &OutVals, 1468 DebugLoc dl, SelectionDAG &DAG) const { 1469 1470 SmallVector<CCValAssign, 16> RVLocs; 1471 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1472 getTargetMachine(), RVLocs, *DAG.getContext()); 1473 CCInfo.AnalyzeReturn(Outs, RetCC_SPU); 1474 1475 // If this is the first return lowered for this function, add the regs to the 1476 // liveout set for the function. 1477 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 1478 for (unsigned i = 0; i != RVLocs.size(); ++i) 1479 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 1480 } 1481 1482 SDValue Flag; 1483 1484 // Copy the result values into the output registers. 1485 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1486 CCValAssign &VA = RVLocs[i]; 1487 assert(VA.isRegLoc() && "Can only return in registers!"); 1488 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 1489 OutVals[i], Flag); 1490 Flag = Chain.getValue(1); 1491 } 1492 1493 if (Flag.getNode()) 1494 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag); 1495 else 1496 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain); 1497 } 1498 1499 1500 //===----------------------------------------------------------------------===// 1501 // Vector related lowering: 1502 //===----------------------------------------------------------------------===// 1503 1504 static ConstantSDNode * 1505 getVecImm(SDNode *N) { 1506 SDValue OpVal(0, 0); 1507 1508 // Check to see if this buildvec has a single non-undef value in its elements. 1509 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1510 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 1511 if (OpVal.getNode() == 0) 1512 OpVal = N->getOperand(i); 1513 else if (OpVal != N->getOperand(i)) 1514 return 0; 1515 } 1516 1517 if (OpVal.getNode() != 0) { 1518 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 1519 return CN; 1520 } 1521 } 1522 1523 return 0; 1524 } 1525 1526 /// get_vec_i18imm - Test if this vector is a vector filled with the same value 1527 /// and the value fits into an unsigned 18-bit constant, and if so, return the 1528 /// constant 1529 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG, 1530 EVT ValueType) { 1531 if (ConstantSDNode *CN = getVecImm(N)) { 1532 uint64_t Value = CN->getZExtValue(); 1533 if (ValueType == MVT::i64) { 1534 uint64_t UValue = CN->getZExtValue(); 1535 uint32_t upper = uint32_t(UValue >> 32); 1536 uint32_t lower = uint32_t(UValue); 1537 if (upper != lower) 1538 return SDValue(); 1539 Value = Value >> 32; 1540 } 1541 if (Value <= 0x3ffff) 1542 return DAG.getTargetConstant(Value, ValueType); 1543 } 1544 1545 return SDValue(); 1546 } 1547 1548 /// get_vec_i16imm - Test if this vector is a vector filled with the same value 1549 /// and the value fits into a signed 16-bit constant, and if so, return the 1550 /// constant 1551 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG, 1552 EVT ValueType) { 1553 if (ConstantSDNode *CN = getVecImm(N)) { 1554 int64_t Value = CN->getSExtValue(); 1555 if (ValueType == MVT::i64) { 1556 uint64_t UValue = CN->getZExtValue(); 1557 uint32_t upper = uint32_t(UValue >> 32); 1558 uint32_t lower = uint32_t(UValue); 1559 if (upper != lower) 1560 return SDValue(); 1561 Value = Value >> 32; 1562 } 1563 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) { 1564 return DAG.getTargetConstant(Value, ValueType); 1565 } 1566 } 1567 1568 return SDValue(); 1569 } 1570 1571 /// get_vec_i10imm - Test if this vector is a vector filled with the same value 1572 /// and the value fits into a signed 10-bit constant, and if so, return the 1573 /// constant 1574 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG, 1575 EVT ValueType) { 1576 if (ConstantSDNode *CN = getVecImm(N)) { 1577 int64_t Value = CN->getSExtValue(); 1578 if (ValueType == MVT::i64) { 1579 uint64_t UValue = CN->getZExtValue(); 1580 uint32_t upper = uint32_t(UValue >> 32); 1581 uint32_t lower = uint32_t(UValue); 1582 if (upper != lower) 1583 return SDValue(); 1584 Value = Value >> 32; 1585 } 1586 if (isInt<10>(Value)) 1587 return DAG.getTargetConstant(Value, ValueType); 1588 } 1589 1590 return SDValue(); 1591 } 1592 1593 /// get_vec_i8imm - Test if this vector is a vector filled with the same value 1594 /// and the value fits into a signed 8-bit constant, and if so, return the 1595 /// constant. 1596 /// 1597 /// @note: The incoming vector is v16i8 because that's the only way we can load 1598 /// constant vectors. Thus, we test to see if the upper and lower bytes are the 1599 /// same value. 1600 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG, 1601 EVT ValueType) { 1602 if (ConstantSDNode *CN = getVecImm(N)) { 1603 int Value = (int) CN->getZExtValue(); 1604 if (ValueType == MVT::i16 1605 && Value <= 0xffff /* truncated from uint64_t */ 1606 && ((short) Value >> 8) == ((short) Value & 0xff)) 1607 return DAG.getTargetConstant(Value & 0xff, ValueType); 1608 else if (ValueType == MVT::i8 1609 && (Value & 0xff) == Value) 1610 return DAG.getTargetConstant(Value, ValueType); 1611 } 1612 1613 return SDValue(); 1614 } 1615 1616 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value 1617 /// and the value fits into a signed 16-bit constant, and if so, return the 1618 /// constant 1619 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG, 1620 EVT ValueType) { 1621 if (ConstantSDNode *CN = getVecImm(N)) { 1622 uint64_t Value = CN->getZExtValue(); 1623 if ((ValueType == MVT::i32 1624 && ((unsigned) Value & 0xffff0000) == (unsigned) Value) 1625 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value)) 1626 return DAG.getTargetConstant(Value >> 16, ValueType); 1627 } 1628 1629 return SDValue(); 1630 } 1631 1632 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors 1633 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) { 1634 if (ConstantSDNode *CN = getVecImm(N)) { 1635 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32); 1636 } 1637 1638 return SDValue(); 1639 } 1640 1641 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors 1642 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) { 1643 if (ConstantSDNode *CN = getVecImm(N)) { 1644 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64); 1645 } 1646 1647 return SDValue(); 1648 } 1649 1650 //! Lower a BUILD_VECTOR instruction creatively: 1651 static SDValue 1652 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { 1653 EVT VT = Op.getValueType(); 1654 EVT EltVT = VT.getVectorElementType(); 1655 DebugLoc dl = Op.getDebugLoc(); 1656 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 1657 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR"); 1658 unsigned minSplatBits = EltVT.getSizeInBits(); 1659 1660 if (minSplatBits < 16) 1661 minSplatBits = 16; 1662 1663 APInt APSplatBits, APSplatUndef; 1664 unsigned SplatBitSize; 1665 bool HasAnyUndefs; 1666 1667 if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, 1668 HasAnyUndefs, minSplatBits) 1669 || minSplatBits < SplatBitSize) 1670 return SDValue(); // Wasn't a constant vector or splat exceeded min 1671 1672 uint64_t SplatBits = APSplatBits.getZExtValue(); 1673 1674 switch (VT.getSimpleVT().SimpleTy) { 1675 default: 1676 report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " + 1677 Twine(VT.getEVTString())); 1678 /*NOTREACHED*/ 1679 case MVT::v4f32: { 1680 uint32_t Value32 = uint32_t(SplatBits); 1681 assert(SplatBitSize == 32 1682 && "LowerBUILD_VECTOR: Unexpected floating point vector element."); 1683 // NOTE: pretend the constant is an integer. LLVM won't load FP constants 1684 SDValue T = DAG.getConstant(Value32, MVT::i32); 1685 return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, 1686 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T)); 1687 break; 1688 } 1689 case MVT::v2f64: { 1690 uint64_t f64val = uint64_t(SplatBits); 1691 assert(SplatBitSize == 64 1692 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes."); 1693 // NOTE: pretend the constant is an integer. LLVM won't load FP constants 1694 SDValue T = DAG.getConstant(f64val, MVT::i64); 1695 return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, 1696 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T)); 1697 break; 1698 } 1699 case MVT::v16i8: { 1700 // 8-bit constants have to be expanded to 16-bits 1701 unsigned short Value16 = SplatBits /* | (SplatBits << 8) */; 1702 SmallVector<SDValue, 8> Ops; 1703 1704 Ops.assign(8, DAG.getConstant(Value16, MVT::i16)); 1705 return DAG.getNode(ISD::BITCAST, dl, VT, 1706 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size())); 1707 } 1708 case MVT::v8i16: { 1709 unsigned short Value16 = SplatBits; 1710 SDValue T = DAG.getConstant(Value16, EltVT); 1711 SmallVector<SDValue, 8> Ops; 1712 1713 Ops.assign(8, T); 1714 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); 1715 } 1716 case MVT::v4i32: { 1717 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); 1718 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T); 1719 } 1720 case MVT::v2i64: { 1721 return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl); 1722 } 1723 } 1724 1725 return SDValue(); 1726 } 1727 1728 /*! 1729 */ 1730 SDValue 1731 SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, 1732 DebugLoc dl) { 1733 uint32_t upper = uint32_t(SplatVal >> 32); 1734 uint32_t lower = uint32_t(SplatVal); 1735 1736 if (upper == lower) { 1737 // Magic constant that can be matched by IL, ILA, et. al. 1738 SDValue Val = DAG.getTargetConstant(upper, MVT::i32); 1739 return DAG.getNode(ISD::BITCAST, dl, OpVT, 1740 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 1741 Val, Val, Val, Val)); 1742 } else { 1743 bool upper_special, lower_special; 1744 1745 // NOTE: This code creates common-case shuffle masks that can be easily 1746 // detected as common expressions. It is not attempting to create highly 1747 // specialized masks to replace any and all 0's, 0xff's and 0x80's. 1748 1749 // Detect if the upper or lower half is a special shuffle mask pattern: 1750 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000); 1751 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000); 1752 1753 // Both upper and lower are special, lower to a constant pool load: 1754 if (lower_special && upper_special) { 1755 SDValue UpperVal = DAG.getConstant(upper, MVT::i32); 1756 SDValue LowerVal = DAG.getConstant(lower, MVT::i32); 1757 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 1758 UpperVal, LowerVal, UpperVal, LowerVal); 1759 return DAG.getNode(ISD::BITCAST, dl, OpVT, BV); 1760 } 1761 1762 SDValue LO32; 1763 SDValue HI32; 1764 SmallVector<SDValue, 16> ShufBytes; 1765 SDValue Result; 1766 1767 // Create lower vector if not a special pattern 1768 if (!lower_special) { 1769 SDValue LO32C = DAG.getConstant(lower, MVT::i32); 1770 LO32 = DAG.getNode(ISD::BITCAST, dl, OpVT, 1771 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 1772 LO32C, LO32C, LO32C, LO32C)); 1773 } 1774 1775 // Create upper vector if not a special pattern 1776 if (!upper_special) { 1777 SDValue HI32C = DAG.getConstant(upper, MVT::i32); 1778 HI32 = DAG.getNode(ISD::BITCAST, dl, OpVT, 1779 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 1780 HI32C, HI32C, HI32C, HI32C)); 1781 } 1782 1783 // If either upper or lower are special, then the two input operands are 1784 // the same (basically, one of them is a "don't care") 1785 if (lower_special) 1786 LO32 = HI32; 1787 if (upper_special) 1788 HI32 = LO32; 1789 1790 for (int i = 0; i < 4; ++i) { 1791 uint64_t val = 0; 1792 for (int j = 0; j < 4; ++j) { 1793 SDValue V; 1794 bool process_upper, process_lower; 1795 val <<= 8; 1796 process_upper = (upper_special && (i & 1) == 0); 1797 process_lower = (lower_special && (i & 1) == 1); 1798 1799 if (process_upper || process_lower) { 1800 if ((process_upper && upper == 0) 1801 || (process_lower && lower == 0)) 1802 val |= 0x80; 1803 else if ((process_upper && upper == 0xffffffff) 1804 || (process_lower && lower == 0xffffffff)) 1805 val |= 0xc0; 1806 else if ((process_upper && upper == 0x80000000) 1807 || (process_lower && lower == 0x80000000)) 1808 val |= (j == 0 ? 0xe0 : 0x80); 1809 } else 1810 val |= i * 4 + j + ((i & 1) * 16); 1811 } 1812 1813 ShufBytes.push_back(DAG.getConstant(val, MVT::i32)); 1814 } 1815 1816 return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32, 1817 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 1818 &ShufBytes[0], ShufBytes.size())); 1819 } 1820 } 1821 1822 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on 1823 /// which the Cell can operate. The code inspects V3 to ascertain whether the 1824 /// permutation vector, V3, is monotonically increasing with one "exception" 1825 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a 1826 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool. 1827 /// In either case, the net result is going to eventually invoke SHUFB to 1828 /// permute/shuffle the bytes from V1 and V2. 1829 /// \note 1830 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate 1831 /// control word for byte/halfword/word insertion. This takes care of a single 1832 /// element move from V2 into V1. 1833 /// \note 1834 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions. 1835 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 1836 const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); 1837 SDValue V1 = Op.getOperand(0); 1838 SDValue V2 = Op.getOperand(1); 1839 DebugLoc dl = Op.getDebugLoc(); 1840 1841 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 1842 1843 // If we have a single element being moved from V1 to V2, this can be handled 1844 // using the C*[DX] compute mask instructions, but the vector elements have 1845 // to be monotonically increasing with one exception element, and the source 1846 // slot of the element to move must be the same as the destination. 1847 EVT VecVT = V1.getValueType(); 1848 EVT EltVT = VecVT.getVectorElementType(); 1849 unsigned EltsFromV2 = 0; 1850 unsigned V2EltOffset = 0; 1851 unsigned V2EltIdx0 = 0; 1852 unsigned CurrElt = 0; 1853 unsigned MaxElts = VecVT.getVectorNumElements(); 1854 unsigned PrevElt = 0; 1855 bool monotonic = true; 1856 bool rotate = true; 1857 int rotamt=0; 1858 EVT maskVT; // which of the c?d instructions to use 1859 1860 if (EltVT == MVT::i8) { 1861 V2EltIdx0 = 16; 1862 maskVT = MVT::v16i8; 1863 } else if (EltVT == MVT::i16) { 1864 V2EltIdx0 = 8; 1865 maskVT = MVT::v8i16; 1866 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) { 1867 V2EltIdx0 = 4; 1868 maskVT = MVT::v4i32; 1869 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) { 1870 V2EltIdx0 = 2; 1871 maskVT = MVT::v2i64; 1872 } else 1873 llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE"); 1874 1875 for (unsigned i = 0; i != MaxElts; ++i) { 1876 if (SVN->getMaskElt(i) < 0) 1877 continue; 1878 1879 unsigned SrcElt = SVN->getMaskElt(i); 1880 1881 if (monotonic) { 1882 if (SrcElt >= V2EltIdx0) { 1883 // TODO: optimize for the monotonic case when several consecutive 1884 // elements are taken form V2. Do we ever get such a case? 1885 if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0)) 1886 V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8); 1887 else 1888 monotonic = false; 1889 ++EltsFromV2; 1890 } else if (CurrElt != SrcElt) { 1891 monotonic = false; 1892 } 1893 1894 ++CurrElt; 1895 } 1896 1897 if (rotate) { 1898 if (PrevElt > 0 && SrcElt < MaxElts) { 1899 if ((PrevElt == SrcElt - 1) 1900 || (PrevElt == MaxElts - 1 && SrcElt == 0)) { 1901 PrevElt = SrcElt; 1902 } else { 1903 rotate = false; 1904 } 1905 } else if (i == 0 || (PrevElt==0 && SrcElt==1)) { 1906 // First time or after a "wrap around" 1907 rotamt = SrcElt-i; 1908 PrevElt = SrcElt; 1909 } else { 1910 // This isn't a rotation, takes elements from vector 2 1911 rotate = false; 1912 } 1913 } 1914 } 1915 1916 if (EltsFromV2 == 1 && monotonic) { 1917 // Compute mask and shuffle 1918 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1919 1920 // As SHUFFLE_MASK becomes a c?d instruction, feed it an address 1921 // R1 ($sp) is used here only as it is guaranteed to have last bits zero 1922 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 1923 DAG.getRegister(SPU::R1, PtrVT), 1924 DAG.getConstant(V2EltOffset, MVT::i32)); 1925 SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, 1926 maskVT, Pointer); 1927 1928 // Use shuffle mask in SHUFB synthetic instruction: 1929 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1, 1930 ShufMaskOp); 1931 } else if (rotate) { 1932 if (rotamt < 0) 1933 rotamt +=MaxElts; 1934 rotamt *= EltVT.getSizeInBits()/8; 1935 return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(), 1936 V1, DAG.getConstant(rotamt, MVT::i16)); 1937 } else { 1938 // Convert the SHUFFLE_VECTOR mask's input element units to the 1939 // actual bytes. 1940 unsigned BytesPerElement = EltVT.getSizeInBits()/8; 1941 1942 SmallVector<SDValue, 16> ResultMask; 1943 for (unsigned i = 0, e = MaxElts; i != e; ++i) { 1944 unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i); 1945 1946 for (unsigned j = 0; j < BytesPerElement; ++j) 1947 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8)); 1948 } 1949 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, 1950 &ResultMask[0], ResultMask.size()); 1951 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask); 1952 } 1953 } 1954 1955 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { 1956 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar 1957 DebugLoc dl = Op.getDebugLoc(); 1958 1959 if (Op0.getNode()->getOpcode() == ISD::Constant) { 1960 // For a constant, build the appropriate constant vector, which will 1961 // eventually simplify to a vector register load. 1962 1963 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode()); 1964 SmallVector<SDValue, 16> ConstVecValues; 1965 EVT VT; 1966 size_t n_copies; 1967 1968 // Create a constant vector: 1969 switch (Op.getValueType().getSimpleVT().SimpleTy) { 1970 default: llvm_unreachable("Unexpected constant value type in " 1971 "LowerSCALAR_TO_VECTOR"); 1972 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break; 1973 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break; 1974 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break; 1975 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break; 1976 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break; 1977 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break; 1978 } 1979 1980 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT); 1981 for (size_t j = 0; j < n_copies; ++j) 1982 ConstVecValues.push_back(CValue); 1983 1984 return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(), 1985 &ConstVecValues[0], ConstVecValues.size()); 1986 } else { 1987 // Otherwise, copy the value from one register to another: 1988 switch (Op0.getValueType().getSimpleVT().SimpleTy) { 1989 default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR"); 1990 case MVT::i8: 1991 case MVT::i16: 1992 case MVT::i32: 1993 case MVT::i64: 1994 case MVT::f32: 1995 case MVT::f64: 1996 return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0); 1997 } 1998 } 1999 2000 return SDValue(); 2001 } 2002 2003 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 2004 EVT VT = Op.getValueType(); 2005 SDValue N = Op.getOperand(0); 2006 SDValue Elt = Op.getOperand(1); 2007 DebugLoc dl = Op.getDebugLoc(); 2008 SDValue retval; 2009 2010 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { 2011 // Constant argument: 2012 int EltNo = (int) C->getZExtValue(); 2013 2014 // sanity checks: 2015 if (VT == MVT::i8 && EltNo >= 16) 2016 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15"); 2017 else if (VT == MVT::i16 && EltNo >= 8) 2018 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7"); 2019 else if (VT == MVT::i32 && EltNo >= 4) 2020 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4"); 2021 else if (VT == MVT::i64 && EltNo >= 2) 2022 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2"); 2023 2024 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) { 2025 // i32 and i64: Element 0 is the preferred slot 2026 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N); 2027 } 2028 2029 // Need to generate shuffle mask and extract: 2030 int prefslot_begin = -1, prefslot_end = -1; 2031 int elt_byte = EltNo * VT.getSizeInBits() / 8; 2032 2033 switch (VT.getSimpleVT().SimpleTy) { 2034 default: 2035 assert(false && "Invalid value type!"); 2036 case MVT::i8: { 2037 prefslot_begin = prefslot_end = 3; 2038 break; 2039 } 2040 case MVT::i16: { 2041 prefslot_begin = 2; prefslot_end = 3; 2042 break; 2043 } 2044 case MVT::i32: 2045 case MVT::f32: { 2046 prefslot_begin = 0; prefslot_end = 3; 2047 break; 2048 } 2049 case MVT::i64: 2050 case MVT::f64: { 2051 prefslot_begin = 0; prefslot_end = 7; 2052 break; 2053 } 2054 } 2055 2056 assert(prefslot_begin != -1 && prefslot_end != -1 && 2057 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized"); 2058 2059 unsigned int ShufBytes[16] = { 2060 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 2061 }; 2062 for (int i = 0; i < 16; ++i) { 2063 // zero fill uppper part of preferred slot, don't care about the 2064 // other slots: 2065 unsigned int mask_val; 2066 if (i <= prefslot_end) { 2067 mask_val = 2068 ((i < prefslot_begin) 2069 ? 0x80 2070 : elt_byte + (i - prefslot_begin)); 2071 2072 ShufBytes[i] = mask_val; 2073 } else 2074 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)]; 2075 } 2076 2077 SDValue ShufMask[4]; 2078 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) { 2079 unsigned bidx = i * 4; 2080 unsigned int bits = ((ShufBytes[bidx] << 24) | 2081 (ShufBytes[bidx+1] << 16) | 2082 (ShufBytes[bidx+2] << 8) | 2083 ShufBytes[bidx+3]); 2084 ShufMask[i] = DAG.getConstant(bits, MVT::i32); 2085 } 2086 2087 SDValue ShufMaskVec = 2088 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2089 &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0])); 2090 2091 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, 2092 DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(), 2093 N, N, ShufMaskVec)); 2094 } else { 2095 // Variable index: Rotate the requested element into slot 0, then replicate 2096 // slot 0 across the vector 2097 EVT VecVT = N.getValueType(); 2098 if (!VecVT.isSimple() || !VecVT.isVector()) { 2099 report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit" 2100 "vector type!"); 2101 } 2102 2103 // Make life easier by making sure the index is zero-extended to i32 2104 if (Elt.getValueType() != MVT::i32) 2105 Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt); 2106 2107 // Scale the index to a bit/byte shift quantity 2108 APInt scaleFactor = 2109 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false); 2110 unsigned scaleShift = scaleFactor.logBase2(); 2111 SDValue vecShift; 2112 2113 if (scaleShift > 0) { 2114 // Scale the shift factor: 2115 Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt, 2116 DAG.getConstant(scaleShift, MVT::i32)); 2117 } 2118 2119 vecShift = DAG.getNode(SPUISD::SHL_BYTES, dl, VecVT, N, Elt); 2120 2121 // Replicate the bytes starting at byte 0 across the entire vector (for 2122 // consistency with the notion of a unified register set) 2123 SDValue replicate; 2124 2125 switch (VT.getSimpleVT().SimpleTy) { 2126 default: 2127 report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector" 2128 "type"); 2129 /*NOTREACHED*/ 2130 case MVT::i8: { 2131 SDValue factor = DAG.getConstant(0x00000000, MVT::i32); 2132 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2133 factor, factor, factor, factor); 2134 break; 2135 } 2136 case MVT::i16: { 2137 SDValue factor = DAG.getConstant(0x00010001, MVT::i32); 2138 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2139 factor, factor, factor, factor); 2140 break; 2141 } 2142 case MVT::i32: 2143 case MVT::f32: { 2144 SDValue factor = DAG.getConstant(0x00010203, MVT::i32); 2145 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2146 factor, factor, factor, factor); 2147 break; 2148 } 2149 case MVT::i64: 2150 case MVT::f64: { 2151 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32); 2152 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32); 2153 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2154 loFactor, hiFactor, loFactor, hiFactor); 2155 break; 2156 } 2157 } 2158 2159 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, 2160 DAG.getNode(SPUISD::SHUFB, dl, VecVT, 2161 vecShift, vecShift, replicate)); 2162 } 2163 2164 return retval; 2165 } 2166 2167 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 2168 SDValue VecOp = Op.getOperand(0); 2169 SDValue ValOp = Op.getOperand(1); 2170 SDValue IdxOp = Op.getOperand(2); 2171 DebugLoc dl = Op.getDebugLoc(); 2172 EVT VT = Op.getValueType(); 2173 EVT eltVT = ValOp.getValueType(); 2174 2175 // use 0 when the lane to insert to is 'undef' 2176 int64_t Offset=0; 2177 if (IdxOp.getOpcode() != ISD::UNDEF) { 2178 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp); 2179 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); 2180 Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8; 2181 } 2182 2183 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2184 // Use $sp ($1) because it's always 16-byte aligned and it's available: 2185 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 2186 DAG.getRegister(SPU::R1, PtrVT), 2187 DAG.getConstant(Offset, PtrVT)); 2188 // widen the mask when dealing with half vectors 2189 EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(), 2190 128/ VT.getVectorElementType().getSizeInBits()); 2191 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer); 2192 2193 SDValue result = 2194 DAG.getNode(SPUISD::SHUFB, dl, VT, 2195 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp), 2196 VecOp, 2197 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ShufMask)); 2198 2199 return result; 2200 } 2201 2202 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, 2203 const TargetLowering &TLI) 2204 { 2205 SDValue N0 = Op.getOperand(0); // Everything has at least one operand 2206 DebugLoc dl = Op.getDebugLoc(); 2207 EVT ShiftVT = TLI.getShiftAmountTy(N0.getValueType()); 2208 2209 assert(Op.getValueType() == MVT::i8); 2210 switch (Opc) { 2211 default: 2212 llvm_unreachable("Unhandled i8 math operator"); 2213 /*NOTREACHED*/ 2214 break; 2215 case ISD::ADD: { 2216 // 8-bit addition: Promote the arguments up to 16-bits and truncate 2217 // the result: 2218 SDValue N1 = Op.getOperand(1); 2219 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); 2220 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); 2221 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2222 DAG.getNode(Opc, dl, MVT::i16, N0, N1)); 2223 2224 } 2225 2226 case ISD::SUB: { 2227 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate 2228 // the result: 2229 SDValue N1 = Op.getOperand(1); 2230 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); 2231 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); 2232 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2233 DAG.getNode(Opc, dl, MVT::i16, N0, N1)); 2234 } 2235 case ISD::ROTR: 2236 case ISD::ROTL: { 2237 SDValue N1 = Op.getOperand(1); 2238 EVT N1VT = N1.getValueType(); 2239 2240 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); 2241 if (!N1VT.bitsEq(ShiftVT)) { 2242 unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT) 2243 ? ISD::ZERO_EXTEND 2244 : ISD::TRUNCATE; 2245 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); 2246 } 2247 2248 // Replicate lower 8-bits into upper 8: 2249 SDValue ExpandArg = 2250 DAG.getNode(ISD::OR, dl, MVT::i16, N0, 2251 DAG.getNode(ISD::SHL, dl, MVT::i16, 2252 N0, DAG.getConstant(8, MVT::i32))); 2253 2254 // Truncate back down to i8 2255 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2256 DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1)); 2257 } 2258 case ISD::SRL: 2259 case ISD::SHL: { 2260 SDValue N1 = Op.getOperand(1); 2261 EVT N1VT = N1.getValueType(); 2262 2263 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); 2264 if (!N1VT.bitsEq(ShiftVT)) { 2265 unsigned N1Opc = ISD::ZERO_EXTEND; 2266 2267 if (N1.getValueType().bitsGT(ShiftVT)) 2268 N1Opc = ISD::TRUNCATE; 2269 2270 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); 2271 } 2272 2273 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2274 DAG.getNode(Opc, dl, MVT::i16, N0, N1)); 2275 } 2276 case ISD::SRA: { 2277 SDValue N1 = Op.getOperand(1); 2278 EVT N1VT = N1.getValueType(); 2279 2280 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); 2281 if (!N1VT.bitsEq(ShiftVT)) { 2282 unsigned N1Opc = ISD::SIGN_EXTEND; 2283 2284 if (N1VT.bitsGT(ShiftVT)) 2285 N1Opc = ISD::TRUNCATE; 2286 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); 2287 } 2288 2289 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2290 DAG.getNode(Opc, dl, MVT::i16, N0, N1)); 2291 } 2292 case ISD::MUL: { 2293 SDValue N1 = Op.getOperand(1); 2294 2295 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); 2296 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); 2297 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2298 DAG.getNode(Opc, dl, MVT::i16, N0, N1)); 2299 break; 2300 } 2301 } 2302 2303 return SDValue(); 2304 } 2305 2306 //! Lower byte immediate operations for v16i8 vectors: 2307 static SDValue 2308 LowerByteImmed(SDValue Op, SelectionDAG &DAG) { 2309 SDValue ConstVec; 2310 SDValue Arg; 2311 EVT VT = Op.getValueType(); 2312 DebugLoc dl = Op.getDebugLoc(); 2313 2314 ConstVec = Op.getOperand(0); 2315 Arg = Op.getOperand(1); 2316 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) { 2317 if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) { 2318 ConstVec = ConstVec.getOperand(0); 2319 } else { 2320 ConstVec = Op.getOperand(1); 2321 Arg = Op.getOperand(0); 2322 if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) { 2323 ConstVec = ConstVec.getOperand(0); 2324 } 2325 } 2326 } 2327 2328 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) { 2329 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode()); 2330 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed"); 2331 2332 APInt APSplatBits, APSplatUndef; 2333 unsigned SplatBitSize; 2334 bool HasAnyUndefs; 2335 unsigned minSplatBits = VT.getVectorElementType().getSizeInBits(); 2336 2337 if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, 2338 HasAnyUndefs, minSplatBits) 2339 && minSplatBits <= SplatBitSize) { 2340 uint64_t SplatBits = APSplatBits.getZExtValue(); 2341 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8); 2342 2343 SmallVector<SDValue, 16> tcVec; 2344 tcVec.assign(16, tc); 2345 return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg, 2346 DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size())); 2347 } 2348 } 2349 2350 // These operations (AND, OR, XOR) are legal, they just couldn't be custom 2351 // lowered. Return the operation, rather than a null SDValue. 2352 return Op; 2353 } 2354 2355 //! Custom lowering for CTPOP (count population) 2356 /*! 2357 Custom lowering code that counts the number ones in the input 2358 operand. SPU has such an instruction, but it counts the number of 2359 ones per byte, which then have to be accumulated. 2360 */ 2361 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { 2362 EVT VT = Op.getValueType(); 2363 EVT vecVT = EVT::getVectorVT(*DAG.getContext(), 2364 VT, (128 / VT.getSizeInBits())); 2365 DebugLoc dl = Op.getDebugLoc(); 2366 2367 switch (VT.getSimpleVT().SimpleTy) { 2368 default: 2369 assert(false && "Invalid value type!"); 2370 case MVT::i8: { 2371 SDValue N = Op.getOperand(0); 2372 SDValue Elt0 = DAG.getConstant(0, MVT::i32); 2373 2374 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); 2375 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); 2376 2377 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0); 2378 } 2379 2380 case MVT::i16: { 2381 MachineFunction &MF = DAG.getMachineFunction(); 2382 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2383 2384 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass); 2385 2386 SDValue N = Op.getOperand(0); 2387 SDValue Elt0 = DAG.getConstant(0, MVT::i16); 2388 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16); 2389 SDValue Shift1 = DAG.getConstant(8, MVT::i32); 2390 2391 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); 2392 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); 2393 2394 // CNTB_result becomes the chain to which all of the virtual registers 2395 // CNTB_reg, SUM1_reg become associated: 2396 SDValue CNTB_result = 2397 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0); 2398 2399 SDValue CNTB_rescopy = 2400 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result); 2401 2402 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16); 2403 2404 return DAG.getNode(ISD::AND, dl, MVT::i16, 2405 DAG.getNode(ISD::ADD, dl, MVT::i16, 2406 DAG.getNode(ISD::SRL, dl, MVT::i16, 2407 Tmp1, Shift1), 2408 Tmp1), 2409 Mask0); 2410 } 2411 2412 case MVT::i32: { 2413 MachineFunction &MF = DAG.getMachineFunction(); 2414 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2415 2416 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); 2417 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); 2418 2419 SDValue N = Op.getOperand(0); 2420 SDValue Elt0 = DAG.getConstant(0, MVT::i32); 2421 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32); 2422 SDValue Shift1 = DAG.getConstant(16, MVT::i32); 2423 SDValue Shift2 = DAG.getConstant(8, MVT::i32); 2424 2425 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); 2426 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); 2427 2428 // CNTB_result becomes the chain to which all of the virtual registers 2429 // CNTB_reg, SUM1_reg become associated: 2430 SDValue CNTB_result = 2431 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0); 2432 2433 SDValue CNTB_rescopy = 2434 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result); 2435 2436 SDValue Comp1 = 2437 DAG.getNode(ISD::SRL, dl, MVT::i32, 2438 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32), 2439 Shift1); 2440 2441 SDValue Sum1 = 2442 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1, 2443 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32)); 2444 2445 SDValue Sum1_rescopy = 2446 DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1); 2447 2448 SDValue Comp2 = 2449 DAG.getNode(ISD::SRL, dl, MVT::i32, 2450 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32), 2451 Shift2); 2452 SDValue Sum2 = 2453 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2, 2454 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32)); 2455 2456 return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0); 2457 } 2458 2459 case MVT::i64: 2460 break; 2461 } 2462 2463 return SDValue(); 2464 } 2465 2466 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32 2467 /*! 2468 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall. 2469 All conversions to i64 are expanded to a libcall. 2470 */ 2471 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, 2472 const SPUTargetLowering &TLI) { 2473 EVT OpVT = Op.getValueType(); 2474 SDValue Op0 = Op.getOperand(0); 2475 EVT Op0VT = Op0.getValueType(); 2476 2477 if ((OpVT == MVT::i32 && Op0VT == MVT::f64) 2478 || OpVT == MVT::i64) { 2479 // Convert f32 / f64 to i32 / i64 via libcall. 2480 RTLIB::Libcall LC = 2481 (Op.getOpcode() == ISD::FP_TO_SINT) 2482 ? RTLIB::getFPTOSINT(Op0VT, OpVT) 2483 : RTLIB::getFPTOUINT(Op0VT, OpVT); 2484 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!"); 2485 SDValue Dummy; 2486 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); 2487 } 2488 2489 return Op; 2490 } 2491 2492 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32 2493 /*! 2494 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall. 2495 All conversions from i64 are expanded to a libcall. 2496 */ 2497 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, 2498 const SPUTargetLowering &TLI) { 2499 EVT OpVT = Op.getValueType(); 2500 SDValue Op0 = Op.getOperand(0); 2501 EVT Op0VT = Op0.getValueType(); 2502 2503 if ((OpVT == MVT::f64 && Op0VT == MVT::i32) 2504 || Op0VT == MVT::i64) { 2505 // Convert i32, i64 to f64 via libcall: 2506 RTLIB::Libcall LC = 2507 (Op.getOpcode() == ISD::SINT_TO_FP) 2508 ? RTLIB::getSINTTOFP(Op0VT, OpVT) 2509 : RTLIB::getUINTTOFP(Op0VT, OpVT); 2510 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!"); 2511 SDValue Dummy; 2512 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); 2513 } 2514 2515 return Op; 2516 } 2517 2518 //! Lower ISD::SETCC 2519 /*! 2520 This handles MVT::f64 (double floating point) condition lowering 2521 */ 2522 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG, 2523 const TargetLowering &TLI) { 2524 CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2)); 2525 DebugLoc dl = Op.getDebugLoc(); 2526 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n"); 2527 2528 SDValue lhs = Op.getOperand(0); 2529 SDValue rhs = Op.getOperand(1); 2530 EVT lhsVT = lhs.getValueType(); 2531 assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n"); 2532 2533 EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType()); 2534 APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); 2535 EVT IntVT(MVT::i64); 2536 2537 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently 2538 // selected to a NOP: 2539 SDValue i64lhs = DAG.getNode(ISD::BITCAST, dl, IntVT, lhs); 2540 SDValue lhsHi32 = 2541 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, 2542 DAG.getNode(ISD::SRL, dl, IntVT, 2543 i64lhs, DAG.getConstant(32, MVT::i32))); 2544 SDValue lhsHi32abs = 2545 DAG.getNode(ISD::AND, dl, MVT::i32, 2546 lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32)); 2547 SDValue lhsLo32 = 2548 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs); 2549 2550 // SETO and SETUO only use the lhs operand: 2551 if (CC->get() == ISD::SETO) { 2552 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of 2553 // SETUO 2554 APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); 2555 return DAG.getNode(ISD::XOR, dl, ccResultVT, 2556 DAG.getSetCC(dl, ccResultVT, 2557 lhs, DAG.getConstantFP(0.0, lhsVT), 2558 ISD::SETUO), 2559 DAG.getConstant(ccResultAllOnes, ccResultVT)); 2560 } else if (CC->get() == ISD::SETUO) { 2561 // Evaluates to true if Op0 is [SQ]NaN 2562 return DAG.getNode(ISD::AND, dl, ccResultVT, 2563 DAG.getSetCC(dl, ccResultVT, 2564 lhsHi32abs, 2565 DAG.getConstant(0x7ff00000, MVT::i32), 2566 ISD::SETGE), 2567 DAG.getSetCC(dl, ccResultVT, 2568 lhsLo32, 2569 DAG.getConstant(0, MVT::i32), 2570 ISD::SETGT)); 2571 } 2572 2573 SDValue i64rhs = DAG.getNode(ISD::BITCAST, dl, IntVT, rhs); 2574 SDValue rhsHi32 = 2575 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, 2576 DAG.getNode(ISD::SRL, dl, IntVT, 2577 i64rhs, DAG.getConstant(32, MVT::i32))); 2578 2579 // If a value is negative, subtract from the sign magnitude constant: 2580 SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT); 2581 2582 // Convert the sign-magnitude representation into 2's complement: 2583 SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT, 2584 lhsHi32, DAG.getConstant(31, MVT::i32)); 2585 SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs); 2586 SDValue lhsSelect = 2587 DAG.getNode(ISD::SELECT, dl, IntVT, 2588 lhsSelectMask, lhsSignMag2TC, i64lhs); 2589 2590 SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT, 2591 rhsHi32, DAG.getConstant(31, MVT::i32)); 2592 SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs); 2593 SDValue rhsSelect = 2594 DAG.getNode(ISD::SELECT, dl, IntVT, 2595 rhsSelectMask, rhsSignMag2TC, i64rhs); 2596 2597 unsigned compareOp; 2598 2599 switch (CC->get()) { 2600 case ISD::SETOEQ: 2601 case ISD::SETUEQ: 2602 compareOp = ISD::SETEQ; break; 2603 case ISD::SETOGT: 2604 case ISD::SETUGT: 2605 compareOp = ISD::SETGT; break; 2606 case ISD::SETOGE: 2607 case ISD::SETUGE: 2608 compareOp = ISD::SETGE; break; 2609 case ISD::SETOLT: 2610 case ISD::SETULT: 2611 compareOp = ISD::SETLT; break; 2612 case ISD::SETOLE: 2613 case ISD::SETULE: 2614 compareOp = ISD::SETLE; break; 2615 case ISD::SETUNE: 2616 case ISD::SETONE: 2617 compareOp = ISD::SETNE; break; 2618 default: 2619 report_fatal_error("CellSPU ISel Select: unimplemented f64 condition"); 2620 } 2621 2622 SDValue result = 2623 DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect, 2624 (ISD::CondCode) compareOp); 2625 2626 if ((CC->get() & 0x8) == 0) { 2627 // Ordered comparison: 2628 SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT, 2629 lhs, DAG.getConstantFP(0.0, MVT::f64), 2630 ISD::SETO); 2631 SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT, 2632 rhs, DAG.getConstantFP(0.0, MVT::f64), 2633 ISD::SETO); 2634 SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN); 2635 2636 result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result); 2637 } 2638 2639 return result; 2640 } 2641 2642 //! Lower ISD::SELECT_CC 2643 /*! 2644 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the 2645 SELB instruction. 2646 2647 \note Need to revisit this in the future: if the code path through the true 2648 and false value computations is longer than the latency of a branch (6 2649 cycles), then it would be more advantageous to branch and insert a new basic 2650 block and branch on the condition. However, this code does not make that 2651 assumption, given the simplisitc uses so far. 2652 */ 2653 2654 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, 2655 const TargetLowering &TLI) { 2656 EVT VT = Op.getValueType(); 2657 SDValue lhs = Op.getOperand(0); 2658 SDValue rhs = Op.getOperand(1); 2659 SDValue trueval = Op.getOperand(2); 2660 SDValue falseval = Op.getOperand(3); 2661 SDValue condition = Op.getOperand(4); 2662 DebugLoc dl = Op.getDebugLoc(); 2663 2664 // NOTE: SELB's arguments: $rA, $rB, $mask 2665 // 2666 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB 2667 // where bits in $mask are 1. CCond will be inverted, having 1s where the 2668 // condition was true and 0s where the condition was false. Hence, the 2669 // arguments to SELB get reversed. 2670 2671 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's 2672 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up 2673 // with another "cannot select select_cc" assert: 2674 2675 SDValue compare = DAG.getNode(ISD::SETCC, dl, 2676 TLI.getSetCCResultType(Op.getValueType()), 2677 lhs, rhs, condition); 2678 return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare); 2679 } 2680 2681 //! Custom lower ISD::TRUNCATE 2682 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) 2683 { 2684 // Type to truncate to 2685 EVT VT = Op.getValueType(); 2686 MVT simpleVT = VT.getSimpleVT(); 2687 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), 2688 VT, (128 / VT.getSizeInBits())); 2689 DebugLoc dl = Op.getDebugLoc(); 2690 2691 // Type to truncate from 2692 SDValue Op0 = Op.getOperand(0); 2693 EVT Op0VT = Op0.getValueType(); 2694 2695 if (Op0VT == MVT::i128 && simpleVT == MVT::i64) { 2696 // Create shuffle mask, least significant doubleword of quadword 2697 unsigned maskHigh = 0x08090a0b; 2698 unsigned maskLow = 0x0c0d0e0f; 2699 // Use a shuffle to perform the truncation 2700 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2701 DAG.getConstant(maskHigh, MVT::i32), 2702 DAG.getConstant(maskLow, MVT::i32), 2703 DAG.getConstant(maskHigh, MVT::i32), 2704 DAG.getConstant(maskLow, MVT::i32)); 2705 2706 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT, 2707 Op0, Op0, shufMask); 2708 2709 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle); 2710 } 2711 2712 return SDValue(); // Leave the truncate unmolested 2713 } 2714 2715 /*! 2716 * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic 2717 * algorithm is to duplicate the sign bit using rotmai to generate at 2718 * least one byte full of sign bits. Then propagate the "sign-byte" into 2719 * the leftmost words and the i64/i32 into the rightmost words using shufb. 2720 * 2721 * @param Op The sext operand 2722 * @param DAG The current DAG 2723 * @return The SDValue with the entire instruction sequence 2724 */ 2725 static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) 2726 { 2727 DebugLoc dl = Op.getDebugLoc(); 2728 2729 // Type to extend to 2730 MVT OpVT = Op.getValueType().getSimpleVT(); 2731 2732 // Type to extend from 2733 SDValue Op0 = Op.getOperand(0); 2734 MVT Op0VT = Op0.getValueType().getSimpleVT(); 2735 2736 // extend i8 & i16 via i32 2737 if (Op0VT == MVT::i8 || Op0VT == MVT::i16) { 2738 Op0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Op0); 2739 Op0VT = MVT::i32; 2740 } 2741 2742 // The type to extend to needs to be a i128 and 2743 // the type to extend from needs to be i64 or i32. 2744 assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) && 2745 "LowerSIGN_EXTEND: input and/or output operand have wrong size"); 2746 (void)OpVT; 2747 2748 // Create shuffle mask 2749 unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7 2750 unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte 8 - 11 2751 unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15 2752 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2753 DAG.getConstant(mask1, MVT::i32), 2754 DAG.getConstant(mask1, MVT::i32), 2755 DAG.getConstant(mask2, MVT::i32), 2756 DAG.getConstant(mask3, MVT::i32)); 2757 2758 // Word wise arithmetic right shift to generate at least one byte 2759 // that contains sign bits. 2760 MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32; 2761 SDValue sraVal = DAG.getNode(ISD::SRA, 2762 dl, 2763 mvt, 2764 DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0), 2765 DAG.getConstant(31, MVT::i32)); 2766 2767 // reinterpret as a i128 (SHUFB requires it). This gets lowered away. 2768 SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 2769 dl, Op0VT, Op0, 2770 DAG.getTargetConstant( 2771 SPU::GPRCRegClass.getID(), 2772 MVT::i32)), 0); 2773 // Shuffle bytes - Copy the sign bits into the upper 64 bits 2774 // and the input value into the lower 64 bits. 2775 SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt, 2776 extended, sraVal, shufMask); 2777 return DAG.getNode(ISD::BITCAST, dl, MVT::i128, extShuffle); 2778 } 2779 2780 //! Custom (target-specific) lowering entry point 2781 /*! 2782 This is where LLVM's DAG selection process calls to do target-specific 2783 lowering of nodes. 2784 */ 2785 SDValue 2786 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const 2787 { 2788 unsigned Opc = (unsigned) Op.getOpcode(); 2789 EVT VT = Op.getValueType(); 2790 2791 switch (Opc) { 2792 default: { 2793 #ifndef NDEBUG 2794 errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n"; 2795 errs() << "Op.getOpcode() = " << Opc << "\n"; 2796 errs() << "*Op.getNode():\n"; 2797 Op.getNode()->dump(); 2798 #endif 2799 llvm_unreachable(0); 2800 } 2801 case ISD::LOAD: 2802 case ISD::EXTLOAD: 2803 case ISD::SEXTLOAD: 2804 case ISD::ZEXTLOAD: 2805 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl()); 2806 case ISD::STORE: 2807 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl()); 2808 case ISD::ConstantPool: 2809 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl()); 2810 case ISD::GlobalAddress: 2811 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl()); 2812 case ISD::JumpTable: 2813 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl()); 2814 case ISD::ConstantFP: 2815 return LowerConstantFP(Op, DAG); 2816 2817 // i8, i64 math ops: 2818 case ISD::ADD: 2819 case ISD::SUB: 2820 case ISD::ROTR: 2821 case ISD::ROTL: 2822 case ISD::SRL: 2823 case ISD::SHL: 2824 case ISD::SRA: { 2825 if (VT == MVT::i8) 2826 return LowerI8Math(Op, DAG, Opc, *this); 2827 break; 2828 } 2829 2830 case ISD::FP_TO_SINT: 2831 case ISD::FP_TO_UINT: 2832 return LowerFP_TO_INT(Op, DAG, *this); 2833 2834 case ISD::SINT_TO_FP: 2835 case ISD::UINT_TO_FP: 2836 return LowerINT_TO_FP(Op, DAG, *this); 2837 2838 // Vector-related lowering. 2839 case ISD::BUILD_VECTOR: 2840 return LowerBUILD_VECTOR(Op, DAG); 2841 case ISD::SCALAR_TO_VECTOR: 2842 return LowerSCALAR_TO_VECTOR(Op, DAG); 2843 case ISD::VECTOR_SHUFFLE: 2844 return LowerVECTOR_SHUFFLE(Op, DAG); 2845 case ISD::EXTRACT_VECTOR_ELT: 2846 return LowerEXTRACT_VECTOR_ELT(Op, DAG); 2847 case ISD::INSERT_VECTOR_ELT: 2848 return LowerINSERT_VECTOR_ELT(Op, DAG); 2849 2850 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately: 2851 case ISD::AND: 2852 case ISD::OR: 2853 case ISD::XOR: 2854 return LowerByteImmed(Op, DAG); 2855 2856 // Vector and i8 multiply: 2857 case ISD::MUL: 2858 if (VT == MVT::i8) 2859 return LowerI8Math(Op, DAG, Opc, *this); 2860 2861 case ISD::CTPOP: 2862 return LowerCTPOP(Op, DAG); 2863 2864 case ISD::SELECT_CC: 2865 return LowerSELECT_CC(Op, DAG, *this); 2866 2867 case ISD::SETCC: 2868 return LowerSETCC(Op, DAG, *this); 2869 2870 case ISD::TRUNCATE: 2871 return LowerTRUNCATE(Op, DAG); 2872 2873 case ISD::SIGN_EXTEND: 2874 return LowerSIGN_EXTEND(Op, DAG); 2875 } 2876 2877 return SDValue(); 2878 } 2879 2880 void SPUTargetLowering::ReplaceNodeResults(SDNode *N, 2881 SmallVectorImpl<SDValue>&Results, 2882 SelectionDAG &DAG) const 2883 { 2884 #if 0 2885 unsigned Opc = (unsigned) N->getOpcode(); 2886 EVT OpVT = N->getValueType(0); 2887 2888 switch (Opc) { 2889 default: { 2890 errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n"; 2891 errs() << "Op.getOpcode() = " << Opc << "\n"; 2892 errs() << "*Op.getNode():\n"; 2893 N->dump(); 2894 abort(); 2895 /*NOTREACHED*/ 2896 } 2897 } 2898 #endif 2899 2900 /* Otherwise, return unchanged */ 2901 } 2902 2903 //===----------------------------------------------------------------------===// 2904 // Target Optimization Hooks 2905 //===----------------------------------------------------------------------===// 2906 2907 SDValue 2908 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const 2909 { 2910 #if 0 2911 TargetMachine &TM = getTargetMachine(); 2912 #endif 2913 const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); 2914 SelectionDAG &DAG = DCI.DAG; 2915 SDValue Op0 = N->getOperand(0); // everything has at least one operand 2916 EVT NodeVT = N->getValueType(0); // The node's value type 2917 EVT Op0VT = Op0.getValueType(); // The first operand's result 2918 SDValue Result; // Initially, empty result 2919 DebugLoc dl = N->getDebugLoc(); 2920 2921 switch (N->getOpcode()) { 2922 default: break; 2923 case ISD::ADD: { 2924 SDValue Op1 = N->getOperand(1); 2925 2926 if (Op0.getOpcode() == SPUISD::IndirectAddr 2927 || Op1.getOpcode() == SPUISD::IndirectAddr) { 2928 // Normalize the operands to reduce repeated code 2929 SDValue IndirectArg = Op0, AddArg = Op1; 2930 2931 if (Op1.getOpcode() == SPUISD::IndirectAddr) { 2932 IndirectArg = Op1; 2933 AddArg = Op0; 2934 } 2935 2936 if (isa<ConstantSDNode>(AddArg)) { 2937 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg); 2938 SDValue IndOp1 = IndirectArg.getOperand(1); 2939 2940 if (CN0->isNullValue()) { 2941 // (add (SPUindirect <arg>, <arg>), 0) -> 2942 // (SPUindirect <arg>, <arg>) 2943 2944 #if !defined(NDEBUG) 2945 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 2946 errs() << "\n" 2947 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n" 2948 << "With: (SPUindirect <arg>, <arg>)\n"; 2949 } 2950 #endif 2951 2952 return IndirectArg; 2953 } else if (isa<ConstantSDNode>(IndOp1)) { 2954 // (add (SPUindirect <arg>, <const>), <const>) -> 2955 // (SPUindirect <arg>, <const + const>) 2956 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1); 2957 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue(); 2958 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT); 2959 2960 #if !defined(NDEBUG) 2961 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 2962 errs() << "\n" 2963 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue() 2964 << "), " << CN0->getSExtValue() << ")\n" 2965 << "With: (SPUindirect <arg>, " 2966 << combinedConst << ")\n"; 2967 } 2968 #endif 2969 2970 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT, 2971 IndirectArg, combinedValue); 2972 } 2973 } 2974 } 2975 break; 2976 } 2977 case ISD::SIGN_EXTEND: 2978 case ISD::ZERO_EXTEND: 2979 case ISD::ANY_EXTEND: { 2980 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) { 2981 // (any_extend (SPUextract_elt0 <arg>)) -> 2982 // (SPUextract_elt0 <arg>) 2983 // Types must match, however... 2984 #if !defined(NDEBUG) 2985 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 2986 errs() << "\nReplace: "; 2987 N->dump(&DAG); 2988 errs() << "\nWith: "; 2989 Op0.getNode()->dump(&DAG); 2990 errs() << "\n"; 2991 } 2992 #endif 2993 2994 return Op0; 2995 } 2996 break; 2997 } 2998 case SPUISD::IndirectAddr: { 2999 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) { 3000 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)); 3001 if (CN != 0 && CN->isNullValue()) { 3002 // (SPUindirect (SPUaform <addr>, 0), 0) -> 3003 // (SPUaform <addr>, 0) 3004 3005 DEBUG(errs() << "Replace: "); 3006 DEBUG(N->dump(&DAG)); 3007 DEBUG(errs() << "\nWith: "); 3008 DEBUG(Op0.getNode()->dump(&DAG)); 3009 DEBUG(errs() << "\n"); 3010 3011 return Op0; 3012 } 3013 } else if (Op0.getOpcode() == ISD::ADD) { 3014 SDValue Op1 = N->getOperand(1); 3015 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) { 3016 // (SPUindirect (add <arg>, <arg>), 0) -> 3017 // (SPUindirect <arg>, <arg>) 3018 if (CN1->isNullValue()) { 3019 3020 #if !defined(NDEBUG) 3021 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 3022 errs() << "\n" 3023 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n" 3024 << "With: (SPUindirect <arg>, <arg>)\n"; 3025 } 3026 #endif 3027 3028 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT, 3029 Op0.getOperand(0), Op0.getOperand(1)); 3030 } 3031 } 3032 } 3033 break; 3034 } 3035 case SPUISD::SHL_BITS: 3036 case SPUISD::SHL_BYTES: 3037 case SPUISD::ROTBYTES_LEFT: { 3038 SDValue Op1 = N->getOperand(1); 3039 3040 // Kill degenerate vector shifts: 3041 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) { 3042 if (CN->isNullValue()) { 3043 Result = Op0; 3044 } 3045 } 3046 break; 3047 } 3048 case SPUISD::PREFSLOT2VEC: { 3049 switch (Op0.getOpcode()) { 3050 default: 3051 break; 3052 case ISD::ANY_EXTEND: 3053 case ISD::ZERO_EXTEND: 3054 case ISD::SIGN_EXTEND: { 3055 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) -> 3056 // <arg> 3057 // but only if the SPUprefslot2vec and <arg> types match. 3058 SDValue Op00 = Op0.getOperand(0); 3059 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) { 3060 SDValue Op000 = Op00.getOperand(0); 3061 if (Op000.getValueType() == NodeVT) { 3062 Result = Op000; 3063 } 3064 } 3065 break; 3066 } 3067 case SPUISD::VEC2PREFSLOT: { 3068 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) -> 3069 // <arg> 3070 Result = Op0.getOperand(0); 3071 break; 3072 } 3073 } 3074 break; 3075 } 3076 } 3077 3078 // Otherwise, return unchanged. 3079 #ifndef NDEBUG 3080 if (Result.getNode()) { 3081 DEBUG(errs() << "\nReplace.SPU: "); 3082 DEBUG(N->dump(&DAG)); 3083 DEBUG(errs() << "\nWith: "); 3084 DEBUG(Result.getNode()->dump(&DAG)); 3085 DEBUG(errs() << "\n"); 3086 } 3087 #endif 3088 3089 return Result; 3090 } 3091 3092 //===----------------------------------------------------------------------===// 3093 // Inline Assembly Support 3094 //===----------------------------------------------------------------------===// 3095 3096 /// getConstraintType - Given a constraint letter, return the type of 3097 /// constraint it is for this target. 3098 SPUTargetLowering::ConstraintType 3099 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const { 3100 if (ConstraintLetter.size() == 1) { 3101 switch (ConstraintLetter[0]) { 3102 default: break; 3103 case 'b': 3104 case 'r': 3105 case 'f': 3106 case 'v': 3107 case 'y': 3108 return C_RegisterClass; 3109 } 3110 } 3111 return TargetLowering::getConstraintType(ConstraintLetter); 3112 } 3113 3114 /// Examine constraint type and operand type and determine a weight value. 3115 /// This object must already have been set up with the operand type 3116 /// and the current alternative constraint selected. 3117 TargetLowering::ConstraintWeight 3118 SPUTargetLowering::getSingleConstraintMatchWeight( 3119 AsmOperandInfo &info, const char *constraint) const { 3120 ConstraintWeight weight = CW_Invalid; 3121 Value *CallOperandVal = info.CallOperandVal; 3122 // If we don't have a value, we can't do a match, 3123 // but allow it at the lowest weight. 3124 if (CallOperandVal == NULL) 3125 return CW_Default; 3126 // Look at the constraint type. 3127 switch (*constraint) { 3128 default: 3129 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 3130 break; 3131 //FIXME: Seems like the supported constraint letters were just copied 3132 // from PPC, as the following doesn't correspond to the GCC docs. 3133 // I'm leaving it so until someone adds the corresponding lowering support. 3134 case 'b': 3135 case 'r': 3136 case 'f': 3137 case 'd': 3138 case 'v': 3139 case 'y': 3140 weight = CW_Register; 3141 break; 3142 } 3143 return weight; 3144 } 3145 3146 std::pair<unsigned, const TargetRegisterClass*> 3147 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 3148 EVT VT) const 3149 { 3150 if (Constraint.size() == 1) { 3151 // GCC RS6000 Constraint Letters 3152 switch (Constraint[0]) { 3153 case 'b': // R1-R31 3154 case 'r': // R0-R31 3155 if (VT == MVT::i64) 3156 return std::make_pair(0U, SPU::R64CRegisterClass); 3157 return std::make_pair(0U, SPU::R32CRegisterClass); 3158 case 'f': 3159 if (VT == MVT::f32) 3160 return std::make_pair(0U, SPU::R32FPRegisterClass); 3161 else if (VT == MVT::f64) 3162 return std::make_pair(0U, SPU::R64FPRegisterClass); 3163 break; 3164 case 'v': 3165 return std::make_pair(0U, SPU::GPRCRegisterClass); 3166 } 3167 } 3168 3169 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 3170 } 3171 3172 //! Compute used/known bits for a SPU operand 3173 void 3174 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 3175 const APInt &Mask, 3176 APInt &KnownZero, 3177 APInt &KnownOne, 3178 const SelectionDAG &DAG, 3179 unsigned Depth ) const { 3180 #if 0 3181 const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT; 3182 3183 switch (Op.getOpcode()) { 3184 default: 3185 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); 3186 break; 3187 case CALL: 3188 case SHUFB: 3189 case SHUFFLE_MASK: 3190 case CNTB: 3191 case SPUISD::PREFSLOT2VEC: 3192 case SPUISD::LDRESULT: 3193 case SPUISD::VEC2PREFSLOT: 3194 case SPUISD::SHLQUAD_L_BITS: 3195 case SPUISD::SHLQUAD_L_BYTES: 3196 case SPUISD::VEC_ROTL: 3197 case SPUISD::VEC_ROTR: 3198 case SPUISD::ROTBYTES_LEFT: 3199 case SPUISD::SELECT_MASK: 3200 case SPUISD::SELB: 3201 } 3202 #endif 3203 } 3204 3205 unsigned 3206 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, 3207 unsigned Depth) const { 3208 switch (Op.getOpcode()) { 3209 default: 3210 return 1; 3211 3212 case ISD::SETCC: { 3213 EVT VT = Op.getValueType(); 3214 3215 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) { 3216 VT = MVT::i32; 3217 } 3218 return VT.getSizeInBits(); 3219 } 3220 } 3221 } 3222 3223 // LowerAsmOperandForConstraint 3224 void 3225 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 3226 std::string &Constraint, 3227 std::vector<SDValue> &Ops, 3228 SelectionDAG &DAG) const { 3229 // Default, for the time being, to the base class handler 3230 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 3231 } 3232 3233 /// isLegalAddressImmediate - Return true if the integer value can be used 3234 /// as the offset of the target addressing mode. 3235 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, 3236 Type *Ty) const { 3237 // SPU's addresses are 256K: 3238 return (V > -(1 << 18) && V < (1 << 18) - 1); 3239 } 3240 3241 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { 3242 return false; 3243 } 3244 3245 bool 3246 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 3247 // The SPU target isn't yet aware of offsets. 3248 return false; 3249 } 3250 3251 // can we compare to Imm without writing it into a register? 3252 bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 3253 //ceqi, cgti, etc. all take s10 operand 3254 return isInt<10>(Imm); 3255 } 3256 3257 bool 3258 SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM, 3259 Type * ) const{ 3260 3261 // A-form: 18bit absolute address. 3262 if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0) 3263 return true; 3264 3265 // D-form: reg + 14bit offset 3266 if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs)) 3267 return true; 3268 3269 // X-form: reg+reg 3270 if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0) 3271 return true; 3272 3273 return false; 3274 } 3275