1 //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief This is the parent TargetLowering class for hardware code gen 12 /// targets. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPUISelLowering.h" 17 #include "AMDGPU.h" 18 #include "AMDGPURegisterInfo.h" 19 #include "AMDGPUSubtarget.h" 20 #include "AMDILIntrinsicInfo.h" 21 #include "R600MachineFunctionInfo.h" 22 #include "SIMachineFunctionInfo.h" 23 #include "llvm/CodeGen/CallingConvLower.h" 24 #include "llvm/CodeGen/MachineFunction.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/SelectionDAG.h" 27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 28 #include "llvm/IR/DataLayout.h" 29 30 using namespace llvm; 31 32 #include "AMDGPUGenCallingConv.inc" 33 34 AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : 35 TargetLowering(TM, new TargetLoweringObjectFileELF()) { 36 37 // Initialize target lowering borrowed from AMDIL 38 InitAMDILLowering(); 39 40 // We need to custom lower some of the intrinsics 41 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 42 43 // Library functions. These default to Expand, but we have instructions 44 // for them. 45 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 46 setOperationAction(ISD::FEXP2, MVT::f32, Legal); 47 setOperationAction(ISD::FPOW, MVT::f32, Legal); 48 setOperationAction(ISD::FLOG2, MVT::f32, Legal); 49 setOperationAction(ISD::FABS, MVT::f32, Legal); 50 setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 51 setOperationAction(ISD::FRINT, MVT::f32, Legal); 52 53 // The hardware supports ROTR, but not ROTL 54 setOperationAction(ISD::ROTL, MVT::i32, Expand); 55 56 // Lower floating point store/load to integer store/load to reduce the number 57 // of patterns in tablegen. 58 setOperationAction(ISD::STORE, MVT::f32, Promote); 59 AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32); 60 61 setOperationAction(ISD::STORE, MVT::v2f32, Promote); 62 AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32); 63 64 setOperationAction(ISD::STORE, MVT::v4f32, Promote); 65 AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); 66 67 setOperationAction(ISD::STORE, MVT::f64, Promote); 68 AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64); 69 70 setOperationAction(ISD::LOAD, MVT::f32, Promote); 71 AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32); 72 73 setOperationAction(ISD::LOAD, MVT::v2f32, Promote); 74 AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32); 75 76 setOperationAction(ISD::LOAD, MVT::v4f32, Promote); 77 AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); 78 79 setOperationAction(ISD::LOAD, MVT::f64, Promote); 80 AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64); 81 82 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Expand); 83 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Expand); 84 85 setOperationAction(ISD::FNEG, MVT::v2f32, Expand); 86 setOperationAction(ISD::FNEG, MVT::v4f32, Expand); 87 88 setOperationAction(ISD::MUL, MVT::i64, Expand); 89 90 setOperationAction(ISD::UDIV, MVT::i32, Expand); 91 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 92 setOperationAction(ISD::UREM, MVT::i32, Expand); 93 setOperationAction(ISD::VSELECT, MVT::v2f32, Expand); 94 setOperationAction(ISD::VSELECT, MVT::v4f32, Expand); 95 96 static const int types[] = { 97 (int)MVT::v2i32, 98 (int)MVT::v4i32 99 }; 100 const size_t NumTypes = array_lengthof(types); 101 102 for (unsigned int x = 0; x < NumTypes; ++x) { 103 MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; 104 //Expand the following operations for the current type by default 105 setOperationAction(ISD::ADD, VT, Expand); 106 setOperationAction(ISD::AND, VT, Expand); 107 setOperationAction(ISD::FP_TO_SINT, VT, Expand); 108 setOperationAction(ISD::FP_TO_UINT, VT, Expand); 109 setOperationAction(ISD::MUL, VT, Expand); 110 setOperationAction(ISD::OR, VT, Expand); 111 setOperationAction(ISD::SHL, VT, Expand); 112 setOperationAction(ISD::SINT_TO_FP, VT, Expand); 113 setOperationAction(ISD::SRL, VT, Expand); 114 setOperationAction(ISD::SRA, VT, Expand); 115 setOperationAction(ISD::SUB, VT, Expand); 116 setOperationAction(ISD::UDIV, VT, Expand); 117 setOperationAction(ISD::UINT_TO_FP, VT, Expand); 118 setOperationAction(ISD::UREM, VT, Expand); 119 setOperationAction(ISD::VSELECT, VT, Expand); 120 setOperationAction(ISD::XOR, VT, Expand); 121 } 122 } 123 124 //===----------------------------------------------------------------------===// 125 // Target Information 126 //===----------------------------------------------------------------------===// 127 128 MVT AMDGPUTargetLowering::getVectorIdxTy() const { 129 return MVT::i32; 130 } 131 132 133 //===---------------------------------------------------------------------===// 134 // Target Properties 135 //===---------------------------------------------------------------------===// 136 137 bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const { 138 assert(VT.isFloatingPoint()); 139 return VT == MVT::f32; 140 } 141 142 bool AMDGPUTargetLowering::isFNegFree(EVT VT) const { 143 assert(VT.isFloatingPoint()); 144 return VT == MVT::f32; 145 } 146 147 //===---------------------------------------------------------------------===// 148 // TargetLowering Callbacks 149 //===---------------------------------------------------------------------===// 150 151 void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State, 152 const SmallVectorImpl<ISD::InputArg> &Ins) const { 153 154 State.AnalyzeFormalArguments(Ins, CC_AMDGPU); 155 } 156 157 SDValue AMDGPUTargetLowering::LowerReturn( 158 SDValue Chain, 159 CallingConv::ID CallConv, 160 bool isVarArg, 161 const SmallVectorImpl<ISD::OutputArg> &Outs, 162 const SmallVectorImpl<SDValue> &OutVals, 163 SDLoc DL, SelectionDAG &DAG) const { 164 return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain); 165 } 166 167 //===---------------------------------------------------------------------===// 168 // Target specific lowering 169 //===---------------------------------------------------------------------===// 170 171 SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) 172 const { 173 switch (Op.getOpcode()) { 174 default: 175 Op.getNode()->dump(); 176 assert(0 && "Custom lowering code for this" 177 "instruction is not implemented yet!"); 178 break; 179 // AMDIL DAG lowering 180 case ISD::SDIV: return LowerSDIV(Op, DAG); 181 case ISD::SREM: return LowerSREM(Op, DAG); 182 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); 183 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 184 // AMDGPU DAG lowering 185 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 186 case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); 187 } 188 return Op; 189 } 190 191 SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, 192 SDValue Op, 193 SelectionDAG &DAG) const { 194 195 const DataLayout *TD = getTargetMachine().getDataLayout(); 196 GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op); 197 // XXX: What does the value of G->getOffset() mean? 198 assert(G->getOffset() == 0 && 199 "Do not know what to do with an non-zero offset"); 200 201 unsigned Offset = MFI->LDSSize; 202 const GlobalValue *GV = G->getGlobal(); 203 uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); 204 205 // XXX: Account for alignment? 206 MFI->LDSSize += Size; 207 208 return DAG.getConstant(Offset, TD->getPointerSize() == 8 ? MVT::i64 : MVT::i32); 209 } 210 211 SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 212 SelectionDAG &DAG) const { 213 unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 214 SDLoc DL(Op); 215 EVT VT = Op.getValueType(); 216 217 switch (IntrinsicID) { 218 default: return Op; 219 case AMDGPUIntrinsic::AMDIL_abs: 220 return LowerIntrinsicIABS(Op, DAG); 221 case AMDGPUIntrinsic::AMDIL_exp: 222 return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1)); 223 case AMDGPUIntrinsic::AMDGPU_lrp: 224 return LowerIntrinsicLRP(Op, DAG); 225 case AMDGPUIntrinsic::AMDIL_fraction: 226 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1)); 227 case AMDGPUIntrinsic::AMDIL_max: 228 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1), 229 Op.getOperand(2)); 230 case AMDGPUIntrinsic::AMDGPU_imax: 231 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1), 232 Op.getOperand(2)); 233 case AMDGPUIntrinsic::AMDGPU_umax: 234 return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1), 235 Op.getOperand(2)); 236 case AMDGPUIntrinsic::AMDIL_min: 237 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1), 238 Op.getOperand(2)); 239 case AMDGPUIntrinsic::AMDGPU_imin: 240 return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1), 241 Op.getOperand(2)); 242 case AMDGPUIntrinsic::AMDGPU_umin: 243 return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1), 244 Op.getOperand(2)); 245 case AMDGPUIntrinsic::AMDIL_round_nearest: 246 return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1)); 247 } 248 } 249 250 ///IABS(a) = SMAX(sub(0, a), a) 251 SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op, 252 SelectionDAG &DAG) const { 253 254 SDLoc DL(Op); 255 EVT VT = Op.getValueType(); 256 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), 257 Op.getOperand(1)); 258 259 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1)); 260 } 261 262 /// Linear Interpolation 263 /// LRP(a, b, c) = muladd(a, b, (1 - a) * c) 264 SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, 265 SelectionDAG &DAG) const { 266 SDLoc DL(Op); 267 EVT VT = Op.getValueType(); 268 SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT, 269 DAG.getConstantFP(1.0f, MVT::f32), 270 Op.getOperand(1)); 271 SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA, 272 Op.getOperand(3)); 273 return DAG.getNode(ISD::FADD, DL, VT, 274 DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)), 275 OneSubAC); 276 } 277 278 /// \brief Generate Min/Max node 279 SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op, 280 SelectionDAG &DAG) const { 281 SDLoc DL(Op); 282 EVT VT = Op.getValueType(); 283 284 SDValue LHS = Op.getOperand(0); 285 SDValue RHS = Op.getOperand(1); 286 SDValue True = Op.getOperand(2); 287 SDValue False = Op.getOperand(3); 288 SDValue CC = Op.getOperand(4); 289 290 if (VT != MVT::f32 || 291 !((LHS == True && RHS == False) || (LHS == False && RHS == True))) { 292 return SDValue(); 293 } 294 295 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 296 switch (CCOpcode) { 297 case ISD::SETOEQ: 298 case ISD::SETONE: 299 case ISD::SETUNE: 300 case ISD::SETNE: 301 case ISD::SETUEQ: 302 case ISD::SETEQ: 303 case ISD::SETFALSE: 304 case ISD::SETFALSE2: 305 case ISD::SETTRUE: 306 case ISD::SETTRUE2: 307 case ISD::SETUO: 308 case ISD::SETO: 309 assert(0 && "Operation should already be optimised !"); 310 case ISD::SETULE: 311 case ISD::SETULT: 312 case ISD::SETOLE: 313 case ISD::SETOLT: 314 case ISD::SETLE: 315 case ISD::SETLT: { 316 if (LHS == True) 317 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); 318 else 319 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS); 320 } 321 case ISD::SETGT: 322 case ISD::SETGE: 323 case ISD::SETUGE: 324 case ISD::SETOGE: 325 case ISD::SETUGT: 326 case ISD::SETOGT: { 327 if (LHS == True) 328 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS); 329 else 330 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); 331 } 332 case ISD::SETCC_INVALID: 333 assert(0 && "Invalid setcc condcode !"); 334 } 335 return Op; 336 } 337 338 339 340 SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, 341 SelectionDAG &DAG) const { 342 SDLoc DL(Op); 343 EVT VT = Op.getValueType(); 344 345 SDValue Num = Op.getOperand(0); 346 SDValue Den = Op.getOperand(1); 347 348 SmallVector<SDValue, 8> Results; 349 350 // RCP = URECIP(Den) = 2^32 / Den + e 351 // e is rounding error. 352 SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den); 353 354 // RCP_LO = umulo(RCP, Den) */ 355 SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den); 356 357 // RCP_HI = mulhu (RCP, Den) */ 358 SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den); 359 360 // NEG_RCP_LO = -RCP_LO 361 SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), 362 RCP_LO); 363 364 // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO) 365 SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), 366 NEG_RCP_LO, RCP_LO, 367 ISD::SETEQ); 368 // Calculate the rounding error from the URECIP instruction 369 // E = mulhu(ABS_RCP_LO, RCP) 370 SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP); 371 372 // RCP_A_E = RCP + E 373 SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E); 374 375 // RCP_S_E = RCP - E 376 SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E); 377 378 // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E) 379 SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), 380 RCP_A_E, RCP_S_E, 381 ISD::SETEQ); 382 // Quotient = mulhu(Tmp0, Num) 383 SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num); 384 385 // Num_S_Remainder = Quotient * Den 386 SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den); 387 388 // Remainder = Num - Num_S_Remainder 389 SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder); 390 391 // Remainder_GE_Den = (Remainder >= Den ? -1 : 0) 392 SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den, 393 DAG.getConstant(-1, VT), 394 DAG.getConstant(0, VT), 395 ISD::SETGE); 396 // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0) 397 SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder, 398 DAG.getConstant(0, VT), 399 DAG.getConstant(-1, VT), 400 DAG.getConstant(0, VT), 401 ISD::SETGE); 402 // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero 403 SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den, 404 Remainder_GE_Zero); 405 406 // Calculate Division result: 407 408 // Quotient_A_One = Quotient + 1 409 SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient, 410 DAG.getConstant(1, VT)); 411 412 // Quotient_S_One = Quotient - 1 413 SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient, 414 DAG.getConstant(1, VT)); 415 416 // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One) 417 SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), 418 Quotient, Quotient_A_One, ISD::SETEQ); 419 420 // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div) 421 Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), 422 Quotient_S_One, Div, ISD::SETEQ); 423 424 // Calculate Rem result: 425 426 // Remainder_S_Den = Remainder - Den 427 SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den); 428 429 // Remainder_A_Den = Remainder + Den 430 SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den); 431 432 // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den) 433 SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), 434 Remainder, Remainder_S_Den, ISD::SETEQ); 435 436 // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem) 437 Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), 438 Remainder_A_Den, Rem, ISD::SETEQ); 439 SDValue Ops[2]; 440 Ops[0] = Div; 441 Ops[1] = Rem; 442 return DAG.getMergeValues(Ops, 2, DL); 443 } 444 445 //===----------------------------------------------------------------------===// 446 // Helper functions 447 //===----------------------------------------------------------------------===// 448 449 bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const { 450 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 451 return CFP->isExactlyValue(1.0); 452 } 453 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 454 return C->isAllOnesValue(); 455 } 456 return false; 457 } 458 459 bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const { 460 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 461 return CFP->getValueAPF().isZero(); 462 } 463 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 464 return C->isNullValue(); 465 } 466 return false; 467 } 468 469 SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, 470 const TargetRegisterClass *RC, 471 unsigned Reg, EVT VT) const { 472 MachineFunction &MF = DAG.getMachineFunction(); 473 MachineRegisterInfo &MRI = MF.getRegInfo(); 474 unsigned VirtualRegister; 475 if (!MRI.isLiveIn(Reg)) { 476 VirtualRegister = MRI.createVirtualRegister(RC); 477 MRI.addLiveIn(Reg, VirtualRegister); 478 } else { 479 VirtualRegister = MRI.getLiveInVirtReg(Reg); 480 } 481 return DAG.getRegister(VirtualRegister, VT); 482 } 483 484 #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node; 485 486 const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { 487 switch (Opcode) { 488 default: return 0; 489 // AMDIL DAG nodes 490 NODE_NAME_CASE(CALL); 491 NODE_NAME_CASE(UMUL); 492 NODE_NAME_CASE(DIV_INF); 493 NODE_NAME_CASE(RET_FLAG); 494 NODE_NAME_CASE(BRANCH_COND); 495 496 // AMDGPU DAG nodes 497 NODE_NAME_CASE(DWORDADDR) 498 NODE_NAME_CASE(FRACT) 499 NODE_NAME_CASE(FMAX) 500 NODE_NAME_CASE(SMAX) 501 NODE_NAME_CASE(UMAX) 502 NODE_NAME_CASE(FMIN) 503 NODE_NAME_CASE(SMIN) 504 NODE_NAME_CASE(UMIN) 505 NODE_NAME_CASE(URECIP) 506 NODE_NAME_CASE(EXPORT) 507 NODE_NAME_CASE(CONST_ADDRESS) 508 NODE_NAME_CASE(REGISTER_LOAD) 509 NODE_NAME_CASE(REGISTER_STORE) 510 } 511 } 512