1 //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief This is the parent TargetLowering class for hardware code gen 12 /// targets. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPUISelLowering.h" 17 #include "AMDGPURegisterInfo.h" 18 #include "AMDILIntrinsicInfo.h" 19 #include "AMDGPUSubtarget.h" 20 #include "llvm/CodeGen/CallingConvLower.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/SelectionDAG.h" 24 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 25 26 using namespace llvm; 27 28 #include "AMDGPUGenCallingConv.inc" 29 30 AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : 31 TargetLowering(TM, new TargetLoweringObjectFileELF()) { 32 33 // Initialize target lowering borrowed from AMDIL 34 InitAMDILLowering(); 35 36 // We need to custom lower some of the intrinsics 37 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 38 39 // Library functions. These default to Expand, but we have instructions 40 // for them. 41 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 42 setOperationAction(ISD::FEXP2, MVT::f32, Legal); 43 setOperationAction(ISD::FPOW, MVT::f32, Legal); 44 setOperationAction(ISD::FLOG2, MVT::f32, Legal); 45 setOperationAction(ISD::FABS, MVT::f32, Legal); 46 setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 47 setOperationAction(ISD::FRINT, MVT::f32, Legal); 48 49 // Lower floating point store/load to integer store/load to reduce the number 50 // of patterns in tablegen. 51 setOperationAction(ISD::STORE, MVT::f32, Promote); 52 AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32); 53 54 setOperationAction(ISD::STORE, MVT::v4f32, Promote); 55 AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); 56 57 setOperationAction(ISD::LOAD, MVT::f32, Promote); 58 AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32); 59 60 setOperationAction(ISD::LOAD, MVT::v4f32, Promote); 61 AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); 62 63 setOperationAction(ISD::UDIV, MVT::i32, Expand); 64 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 65 setOperationAction(ISD::UREM, MVT::i32, Expand); 66 } 67 68 //===---------------------------------------------------------------------===// 69 // TargetLowering Callbacks 70 //===---------------------------------------------------------------------===// 71 72 void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State, 73 const SmallVectorImpl<ISD::InputArg> &Ins) const { 74 75 State.AnalyzeFormalArguments(Ins, CC_AMDGPU); 76 } 77 78 SDValue AMDGPUTargetLowering::LowerReturn( 79 SDValue Chain, 80 CallingConv::ID CallConv, 81 bool isVarArg, 82 const SmallVectorImpl<ISD::OutputArg> &Outs, 83 const SmallVectorImpl<SDValue> &OutVals, 84 DebugLoc DL, SelectionDAG &DAG) const { 85 return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain); 86 } 87 88 //===---------------------------------------------------------------------===// 89 // Target specific lowering 90 //===---------------------------------------------------------------------===// 91 92 SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) 93 const { 94 switch (Op.getOpcode()) { 95 default: 96 Op.getNode()->dump(); 97 assert(0 && "Custom lowering code for this" 98 "instruction is not implemented yet!"); 99 break; 100 // AMDIL DAG lowering 101 case ISD::SDIV: return LowerSDIV(Op, DAG); 102 case ISD::SREM: return LowerSREM(Op, DAG); 103 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); 104 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 105 // AMDGPU DAG lowering 106 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 107 case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); 108 } 109 return Op; 110 } 111 112 SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 113 SelectionDAG &DAG) const { 114 unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 115 DebugLoc DL = Op.getDebugLoc(); 116 EVT VT = Op.getValueType(); 117 118 switch (IntrinsicID) { 119 default: return Op; 120 case AMDGPUIntrinsic::AMDIL_abs: 121 return LowerIntrinsicIABS(Op, DAG); 122 case AMDGPUIntrinsic::AMDIL_exp: 123 return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1)); 124 case AMDGPUIntrinsic::AMDGPU_lrp: 125 return LowerIntrinsicLRP(Op, DAG); 126 case AMDGPUIntrinsic::AMDIL_fraction: 127 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1)); 128 case AMDGPUIntrinsic::AMDIL_max: 129 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1), 130 Op.getOperand(2)); 131 case AMDGPUIntrinsic::AMDGPU_imax: 132 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1), 133 Op.getOperand(2)); 134 case AMDGPUIntrinsic::AMDGPU_umax: 135 return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1), 136 Op.getOperand(2)); 137 case AMDGPUIntrinsic::AMDIL_min: 138 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1), 139 Op.getOperand(2)); 140 case AMDGPUIntrinsic::AMDGPU_imin: 141 return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1), 142 Op.getOperand(2)); 143 case AMDGPUIntrinsic::AMDGPU_umin: 144 return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1), 145 Op.getOperand(2)); 146 case AMDGPUIntrinsic::AMDIL_round_nearest: 147 return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1)); 148 } 149 } 150 151 ///IABS(a) = SMAX(sub(0, a), a) 152 SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op, 153 SelectionDAG &DAG) const { 154 155 DebugLoc DL = Op.getDebugLoc(); 156 EVT VT = Op.getValueType(); 157 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), 158 Op.getOperand(1)); 159 160 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1)); 161 } 162 163 /// Linear Interpolation 164 /// LRP(a, b, c) = muladd(a, b, (1 - a) * c) 165 SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, 166 SelectionDAG &DAG) const { 167 DebugLoc DL = Op.getDebugLoc(); 168 EVT VT = Op.getValueType(); 169 SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT, 170 DAG.getConstantFP(1.0f, MVT::f32), 171 Op.getOperand(1)); 172 SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA, 173 Op.getOperand(3)); 174 return DAG.getNode(ISD::FADD, DL, VT, 175 DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)), 176 OneSubAC); 177 } 178 179 /// \brief Generate Min/Max node 180 SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op, 181 SelectionDAG &DAG) const { 182 DebugLoc DL = Op.getDebugLoc(); 183 EVT VT = Op.getValueType(); 184 185 SDValue LHS = Op.getOperand(0); 186 SDValue RHS = Op.getOperand(1); 187 SDValue True = Op.getOperand(2); 188 SDValue False = Op.getOperand(3); 189 SDValue CC = Op.getOperand(4); 190 191 if (VT != MVT::f32 || 192 !((LHS == True && RHS == False) || (LHS == False && RHS == True))) { 193 return SDValue(); 194 } 195 196 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 197 switch (CCOpcode) { 198 case ISD::SETOEQ: 199 case ISD::SETONE: 200 case ISD::SETUNE: 201 case ISD::SETNE: 202 case ISD::SETUEQ: 203 case ISD::SETEQ: 204 case ISD::SETFALSE: 205 case ISD::SETFALSE2: 206 case ISD::SETTRUE: 207 case ISD::SETTRUE2: 208 case ISD::SETUO: 209 case ISD::SETO: 210 assert(0 && "Operation should already be optimised !"); 211 case ISD::SETULE: 212 case ISD::SETULT: 213 case ISD::SETOLE: 214 case ISD::SETOLT: 215 case ISD::SETLE: 216 case ISD::SETLT: { 217 if (LHS == True) 218 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); 219 else 220 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS); 221 } 222 case ISD::SETGT: 223 case ISD::SETGE: 224 case ISD::SETUGE: 225 case ISD::SETOGE: 226 case ISD::SETUGT: 227 case ISD::SETOGT: { 228 if (LHS == True) 229 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS); 230 else 231 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); 232 } 233 case ISD::SETCC_INVALID: 234 assert(0 && "Invalid setcc condcode !"); 235 } 236 return Op; 237 } 238 239 240 241 SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, 242 SelectionDAG &DAG) const { 243 DebugLoc DL = Op.getDebugLoc(); 244 EVT VT = Op.getValueType(); 245 246 SDValue Num = Op.getOperand(0); 247 SDValue Den = Op.getOperand(1); 248 249 SmallVector<SDValue, 8> Results; 250 251 // RCP = URECIP(Den) = 2^32 / Den + e 252 // e is rounding error. 253 SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den); 254 255 // RCP_LO = umulo(RCP, Den) */ 256 SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den); 257 258 // RCP_HI = mulhu (RCP, Den) */ 259 SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den); 260 261 // NEG_RCP_LO = -RCP_LO 262 SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), 263 RCP_LO); 264 265 // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO) 266 SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), 267 NEG_RCP_LO, RCP_LO, 268 ISD::SETEQ); 269 // Calculate the rounding error from the URECIP instruction 270 // E = mulhu(ABS_RCP_LO, RCP) 271 SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP); 272 273 // RCP_A_E = RCP + E 274 SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E); 275 276 // RCP_S_E = RCP - E 277 SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E); 278 279 // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E) 280 SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), 281 RCP_A_E, RCP_S_E, 282 ISD::SETEQ); 283 // Quotient = mulhu(Tmp0, Num) 284 SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num); 285 286 // Num_S_Remainder = Quotient * Den 287 SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den); 288 289 // Remainder = Num - Num_S_Remainder 290 SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder); 291 292 // Remainder_GE_Den = (Remainder >= Den ? -1 : 0) 293 SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den, 294 DAG.getConstant(-1, VT), 295 DAG.getConstant(0, VT), 296 ISD::SETGE); 297 // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0) 298 SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder, 299 DAG.getConstant(0, VT), 300 DAG.getConstant(-1, VT), 301 DAG.getConstant(0, VT), 302 ISD::SETGE); 303 // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero 304 SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den, 305 Remainder_GE_Zero); 306 307 // Calculate Division result: 308 309 // Quotient_A_One = Quotient + 1 310 SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient, 311 DAG.getConstant(1, VT)); 312 313 // Quotient_S_One = Quotient - 1 314 SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient, 315 DAG.getConstant(1, VT)); 316 317 // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One) 318 SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), 319 Quotient, Quotient_A_One, ISD::SETEQ); 320 321 // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div) 322 Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), 323 Quotient_S_One, Div, ISD::SETEQ); 324 325 // Calculate Rem result: 326 327 // Remainder_S_Den = Remainder - Den 328 SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den); 329 330 // Remainder_A_Den = Remainder + Den 331 SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den); 332 333 // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den) 334 SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), 335 Remainder, Remainder_S_Den, ISD::SETEQ); 336 337 // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem) 338 Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), 339 Remainder_A_Den, Rem, ISD::SETEQ); 340 SDValue Ops[2]; 341 Ops[0] = Div; 342 Ops[1] = Rem; 343 return DAG.getMergeValues(Ops, 2, DL); 344 } 345 346 //===----------------------------------------------------------------------===// 347 // Helper functions 348 //===----------------------------------------------------------------------===// 349 350 bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const { 351 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 352 return CFP->isExactlyValue(1.0); 353 } 354 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 355 return C->isAllOnesValue(); 356 } 357 return false; 358 } 359 360 bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const { 361 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 362 return CFP->getValueAPF().isZero(); 363 } 364 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 365 return C->isNullValue(); 366 } 367 return false; 368 } 369 370 SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, 371 const TargetRegisterClass *RC, 372 unsigned Reg, EVT VT) const { 373 MachineFunction &MF = DAG.getMachineFunction(); 374 MachineRegisterInfo &MRI = MF.getRegInfo(); 375 unsigned VirtualRegister; 376 if (!MRI.isLiveIn(Reg)) { 377 VirtualRegister = MRI.createVirtualRegister(RC); 378 MRI.addLiveIn(Reg, VirtualRegister); 379 } else { 380 VirtualRegister = MRI.getLiveInVirtReg(Reg); 381 } 382 return DAG.getRegister(VirtualRegister, VT); 383 } 384 385 #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node; 386 387 const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { 388 switch (Opcode) { 389 default: return 0; 390 // AMDIL DAG nodes 391 NODE_NAME_CASE(CALL); 392 NODE_NAME_CASE(UMUL); 393 NODE_NAME_CASE(DIV_INF); 394 NODE_NAME_CASE(RET_FLAG); 395 NODE_NAME_CASE(BRANCH_COND); 396 397 // AMDGPU DAG nodes 398 NODE_NAME_CASE(DWORDADDR) 399 NODE_NAME_CASE(FRACT) 400 NODE_NAME_CASE(FMAX) 401 NODE_NAME_CASE(SMAX) 402 NODE_NAME_CASE(UMAX) 403 NODE_NAME_CASE(FMIN) 404 NODE_NAME_CASE(SMIN) 405 NODE_NAME_CASE(UMIN) 406 NODE_NAME_CASE(URECIP) 407 NODE_NAME_CASE(EXPORT) 408 NODE_NAME_CASE(CONST_ADDRESS) 409 NODE_NAME_CASE(REGISTER_LOAD) 410 NODE_NAME_CASE(REGISTER_STORE) 411 } 412 } 413