1 //===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file describes the PTX instructions in TableGen format. 11 // 12 //===----------------------------------------------------------------------===// 13 14 include "NVPTXInstrFormats.td" 15 16 // A NOP instruction 17 def NOP : NVPTXInst<(outs), (ins), "", []>; 18 19 // List of vector specific properties 20 def isVecLD : VecInstTypeEnum<1>; 21 def isVecST : VecInstTypeEnum<2>; 22 def isVecBuild : VecInstTypeEnum<3>; 23 def isVecShuffle : VecInstTypeEnum<4>; 24 def isVecExtract : VecInstTypeEnum<5>; 25 def isVecInsert : VecInstTypeEnum<6>; 26 def isVecDest : VecInstTypeEnum<7>; 27 def isVecOther : VecInstTypeEnum<15>; 28 29 //===----------------------------------------------------------------------===// 30 // NVPTX Operand Definitions. 31 //===----------------------------------------------------------------------===// 32 33 def brtarget : Operand<OtherVT>; 34 35 // CVT conversion modes 36 // These must match the enum in NVPTX.h 37 def CvtNONE : PatLeaf<(i32 0x0)>; 38 def CvtRNI : PatLeaf<(i32 0x1)>; 39 def CvtRZI : PatLeaf<(i32 0x2)>; 40 def CvtRMI : PatLeaf<(i32 0x3)>; 41 def CvtRPI : PatLeaf<(i32 0x4)>; 42 def CvtRN : PatLeaf<(i32 0x5)>; 43 def CvtRZ : PatLeaf<(i32 0x6)>; 44 def CvtRM : PatLeaf<(i32 0x7)>; 45 def CvtRP : PatLeaf<(i32 0x8)>; 46 47 def CvtNONE_FTZ : PatLeaf<(i32 0x10)>; 48 def CvtRNI_FTZ : PatLeaf<(i32 0x11)>; 49 def CvtRZI_FTZ : PatLeaf<(i32 0x12)>; 50 def CvtRMI_FTZ : PatLeaf<(i32 0x13)>; 51 def CvtRPI_FTZ : PatLeaf<(i32 0x14)>; 52 def CvtRN_FTZ : PatLeaf<(i32 0x15)>; 53 def CvtRZ_FTZ : PatLeaf<(i32 0x16)>; 54 def CvtRM_FTZ : PatLeaf<(i32 0x17)>; 55 def CvtRP_FTZ : PatLeaf<(i32 0x18)>; 56 57 def CvtSAT : PatLeaf<(i32 0x20)>; 58 def CvtSAT_FTZ : PatLeaf<(i32 0x30)>; 59 60 def CvtMode : Operand<i32> { 61 let PrintMethod = "printCvtMode"; 62 } 63 64 // Compare modes 65 // These must match the enum in NVPTX.h 66 def CmpEQ : PatLeaf<(i32 0)>; 67 def CmpNE : PatLeaf<(i32 1)>; 68 def CmpLT : PatLeaf<(i32 2)>; 69 def CmpLE : PatLeaf<(i32 3)>; 70 def CmpGT : PatLeaf<(i32 4)>; 71 def CmpGE : PatLeaf<(i32 5)>; 72 def CmpLO : PatLeaf<(i32 6)>; 73 def CmpLS : PatLeaf<(i32 7)>; 74 def CmpHI : PatLeaf<(i32 8)>; 75 def CmpHS : PatLeaf<(i32 9)>; 76 def CmpEQU : PatLeaf<(i32 10)>; 77 def CmpNEU : PatLeaf<(i32 11)>; 78 def CmpLTU : PatLeaf<(i32 12)>; 79 def CmpLEU : PatLeaf<(i32 13)>; 80 def CmpGTU : PatLeaf<(i32 14)>; 81 def CmpGEU : PatLeaf<(i32 15)>; 82 def CmpNUM : PatLeaf<(i32 16)>; 83 def CmpNAN : PatLeaf<(i32 17)>; 84 85 def CmpEQ_FTZ : PatLeaf<(i32 0x100)>; 86 def CmpNE_FTZ : PatLeaf<(i32 0x101)>; 87 def CmpLT_FTZ : PatLeaf<(i32 0x102)>; 88 def CmpLE_FTZ : PatLeaf<(i32 0x103)>; 89 def CmpGT_FTZ : PatLeaf<(i32 0x104)>; 90 def CmpGE_FTZ : PatLeaf<(i32 0x105)>; 91 def CmpLO_FTZ : PatLeaf<(i32 0x106)>; 92 def CmpLS_FTZ : PatLeaf<(i32 0x107)>; 93 def CmpHI_FTZ : PatLeaf<(i32 0x108)>; 94 def CmpHS_FTZ : PatLeaf<(i32 0x109)>; 95 def CmpEQU_FTZ : PatLeaf<(i32 0x10A)>; 96 def CmpNEU_FTZ : PatLeaf<(i32 0x10B)>; 97 def CmpLTU_FTZ : PatLeaf<(i32 0x10C)>; 98 def CmpLEU_FTZ : PatLeaf<(i32 0x10D)>; 99 def CmpGTU_FTZ : PatLeaf<(i32 0x10E)>; 100 def CmpGEU_FTZ : PatLeaf<(i32 0x10F)>; 101 def CmpNUM_FTZ : PatLeaf<(i32 0x110)>; 102 def CmpNAN_FTZ : PatLeaf<(i32 0x111)>; 103 104 def CmpMode : Operand<i32> { 105 let PrintMethod = "printCmpMode"; 106 } 107 108 def F32ConstZero : Operand<f32>, PatLeaf<(f32 fpimm)>, SDNodeXForm<fpimm, [{ 109 return CurDAG->getTargetConstantFP(0.0, MVT::f32); 110 }]>; 111 def F32ConstOne : Operand<f32>, PatLeaf<(f32 fpimm)>, SDNodeXForm<fpimm, [{ 112 return CurDAG->getTargetConstantFP(1.0, MVT::f32); 113 }]>; 114 115 //===----------------------------------------------------------------------===// 116 // NVPTX Instruction Predicate Definitions 117 //===----------------------------------------------------------------------===// 118 119 120 def hasAtomRedG32 : Predicate<"Subtarget->hasAtomRedG32()">; 121 def hasAtomRedS32 : Predicate<"Subtarget->hasAtomRedS32()">; 122 def hasAtomRedGen32 : Predicate<"Subtarget->hasAtomRedGen32()">; 123 def useAtomRedG32forGen32 : 124 Predicate<"!Subtarget->hasAtomRedGen32() && Subtarget->hasAtomRedG32()">; 125 def hasBrkPt : Predicate<"Subtarget->hasBrkPt()">; 126 def hasAtomRedG64 : Predicate<"Subtarget->hasAtomRedG64()">; 127 def hasAtomRedS64 : Predicate<"Subtarget->hasAtomRedS64()">; 128 def hasAtomRedGen64 : Predicate<"Subtarget->hasAtomRedGen64()">; 129 def useAtomRedG64forGen64 : 130 Predicate<"!Subtarget->hasAtomRedGen64() && Subtarget->hasAtomRedG64()">; 131 def hasAtomAddF32 : Predicate<"Subtarget->hasAtomAddF32()">; 132 def hasVote : Predicate<"Subtarget->hasVote()">; 133 def hasDouble : Predicate<"Subtarget->hasDouble()">; 134 def reqPTX20 : Predicate<"Subtarget->reqPTX20()">; 135 def hasLDG : Predicate<"Subtarget->hasLDG()">; 136 def hasLDU : Predicate<"Subtarget->hasLDU()">; 137 def hasGenericLdSt : Predicate<"Subtarget->hasGenericLdSt()">; 138 139 def doF32FTZ : Predicate<"useF32FTZ()">; 140 def doNoF32FTZ : Predicate<"!useF32FTZ()">; 141 142 def doMulWide : Predicate<"doMulWide">; 143 144 def allowFMA : Predicate<"allowFMA()">; 145 def noFMA : Predicate<"!allowFMA()">; 146 147 def do_DIVF32_APPROX : Predicate<"getDivF32Level()==0">; 148 def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">; 149 150 def do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">; 151 def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">; 152 153 def hasHWROT32 : Predicate<"Subtarget->hasHWROT32()">; 154 def noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">; 155 156 def true : Predicate<"1">; 157 158 def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">; 159 160 161 //===----------------------------------------------------------------------===// 162 // Some Common Instruction Class Templates 163 //===----------------------------------------------------------------------===// 164 165 multiclass I3<string OpcStr, SDNode OpNode> { 166 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), 167 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 168 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, 169 Int64Regs:$b))]>; 170 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), 171 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 172 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; 173 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), 174 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 175 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, 176 Int32Regs:$b))]>; 177 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 178 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 179 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; 180 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), 181 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 182 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, 183 Int16Regs:$b))]>; 184 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), 185 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 186 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>; 187 } 188 189 multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> { 190 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, 191 Int32Regs:$b), 192 !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), 193 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, 194 Int32Regs:$b))]>; 195 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 196 !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), 197 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; 198 } 199 200 multiclass F3<string OpcStr, SDNode OpNode> { 201 def f64rr : NVPTXInst<(outs Float64Regs:$dst), 202 (ins Float64Regs:$a, Float64Regs:$b), 203 !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), 204 [(set Float64Regs:$dst, 205 (OpNode Float64Regs:$a, Float64Regs:$b))]>, 206 Requires<[allowFMA]>; 207 def f64ri : NVPTXInst<(outs Float64Regs:$dst), 208 (ins Float64Regs:$a, f64imm:$b), 209 !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), 210 [(set Float64Regs:$dst, 211 (OpNode Float64Regs:$a, fpimm:$b))]>, 212 Requires<[allowFMA]>; 213 def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), 214 (ins Float32Regs:$a, Float32Regs:$b), 215 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), 216 [(set Float32Regs:$dst, 217 (OpNode Float32Regs:$a, Float32Regs:$b))]>, 218 Requires<[allowFMA, doF32FTZ]>; 219 def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), 220 (ins Float32Regs:$a, f32imm:$b), 221 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), 222 [(set Float32Regs:$dst, 223 (OpNode Float32Regs:$a, fpimm:$b))]>, 224 Requires<[allowFMA, doF32FTZ]>; 225 def f32rr : NVPTXInst<(outs Float32Regs:$dst), 226 (ins Float32Regs:$a, Float32Regs:$b), 227 !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), 228 [(set Float32Regs:$dst, 229 (OpNode Float32Regs:$a, Float32Regs:$b))]>, 230 Requires<[allowFMA]>; 231 def f32ri : NVPTXInst<(outs Float32Regs:$dst), 232 (ins Float32Regs:$a, f32imm:$b), 233 !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), 234 [(set Float32Regs:$dst, 235 (OpNode Float32Regs:$a, fpimm:$b))]>, 236 Requires<[allowFMA]>; 237 } 238 239 multiclass F3_rn<string OpcStr, SDNode OpNode> { 240 def f64rr : NVPTXInst<(outs Float64Regs:$dst), 241 (ins Float64Regs:$a, Float64Regs:$b), 242 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), 243 [(set Float64Regs:$dst, 244 (OpNode Float64Regs:$a, Float64Regs:$b))]>, 245 Requires<[noFMA]>; 246 def f64ri : NVPTXInst<(outs Float64Regs:$dst), 247 (ins Float64Regs:$a, f64imm:$b), 248 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), 249 [(set Float64Regs:$dst, 250 (OpNode Float64Regs:$a, fpimm:$b))]>, 251 Requires<[noFMA]>; 252 def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), 253 (ins Float32Regs:$a, Float32Regs:$b), 254 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), 255 [(set Float32Regs:$dst, 256 (OpNode Float32Regs:$a, Float32Regs:$b))]>, 257 Requires<[noFMA, doF32FTZ]>; 258 def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), 259 (ins Float32Regs:$a, f32imm:$b), 260 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), 261 [(set Float32Regs:$dst, 262 (OpNode Float32Regs:$a, fpimm:$b))]>, 263 Requires<[noFMA, doF32FTZ]>; 264 def f32rr : NVPTXInst<(outs Float32Regs:$dst), 265 (ins Float32Regs:$a, Float32Regs:$b), 266 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), 267 [(set Float32Regs:$dst, 268 (OpNode Float32Regs:$a, Float32Regs:$b))]>, 269 Requires<[noFMA]>; 270 def f32ri : NVPTXInst<(outs Float32Regs:$dst), 271 (ins Float32Regs:$a, f32imm:$b), 272 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), 273 [(set Float32Regs:$dst, 274 (OpNode Float32Regs:$a, fpimm:$b))]>, 275 Requires<[noFMA]>; 276 } 277 278 multiclass F2<string OpcStr, SDNode OpNode> { 279 def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a), 280 !strconcat(OpcStr, ".f64 \t$dst, $a;"), 281 [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>; 282 def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a), 283 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"), 284 [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>, 285 Requires<[doF32FTZ]>; 286 def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a), 287 !strconcat(OpcStr, ".f32 \t$dst, $a;"), 288 [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>; 289 } 290 291 //===----------------------------------------------------------------------===// 292 // NVPTX Instructions. 293 //===----------------------------------------------------------------------===// 294 295 //----------------------------------- 296 // General Type Conversion 297 //----------------------------------- 298 299 let hasSideEffects = 0 in { 300 // Generate a cvt to the given type from all possible types. 301 // Each instance takes a CvtMode immediate that defines the conversion mode to 302 // use. It can be CvtNONE to omit a conversion mode. 303 multiclass CVT_FROM_ALL<string FromName, RegisterClass RC> { 304 def _s16 : NVPTXInst<(outs RC:$dst), 305 (ins Int16Regs:$src, CvtMode:$mode), 306 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 307 FromName, ".s16\t$dst, $src;"), 308 []>; 309 def _u16 : NVPTXInst<(outs RC:$dst), 310 (ins Int16Regs:$src, CvtMode:$mode), 311 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 312 FromName, ".u16\t$dst, $src;"), 313 []>; 314 def _f16 : NVPTXInst<(outs RC:$dst), 315 (ins Int16Regs:$src, CvtMode:$mode), 316 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 317 FromName, ".f16\t$dst, $src;"), 318 []>; 319 def _s32 : NVPTXInst<(outs RC:$dst), 320 (ins Int32Regs:$src, CvtMode:$mode), 321 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 322 FromName, ".s32\t$dst, $src;"), 323 []>; 324 def _u32 : NVPTXInst<(outs RC:$dst), 325 (ins Int32Regs:$src, CvtMode:$mode), 326 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 327 FromName, ".u32\t$dst, $src;"), 328 []>; 329 def _s64 : NVPTXInst<(outs RC:$dst), 330 (ins Int64Regs:$src, CvtMode:$mode), 331 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 332 FromName, ".s64\t$dst, $src;"), 333 []>; 334 def _u64 : NVPTXInst<(outs RC:$dst), 335 (ins Int64Regs:$src, CvtMode:$mode), 336 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 337 FromName, ".u64\t$dst, $src;"), 338 []>; 339 def _f32 : NVPTXInst<(outs RC:$dst), 340 (ins Float32Regs:$src, CvtMode:$mode), 341 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 342 FromName, ".f32\t$dst, $src;"), 343 []>; 344 def _f64 : NVPTXInst<(outs RC:$dst), 345 (ins Float64Regs:$src, CvtMode:$mode), 346 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 347 FromName, ".f64\t$dst, $src;"), 348 []>; 349 } 350 351 // Generate a cvt to all possible types. 352 defm CVT_s16 : CVT_FROM_ALL<"s16", Int16Regs>; 353 defm CVT_u16 : CVT_FROM_ALL<"u16", Int16Regs>; 354 defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>; 355 defm CVT_s32 : CVT_FROM_ALL<"s32", Int32Regs>; 356 defm CVT_u32 : CVT_FROM_ALL<"u32", Int32Regs>; 357 defm CVT_s64 : CVT_FROM_ALL<"s64", Int64Regs>; 358 defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>; 359 defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>; 360 defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>; 361 362 // This set of cvt is different from the above. The type of the source 363 // and target are the same. 364 // 365 def CVT_INREG_s16_s8 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), 366 "cvt.s16.s8 \t$dst, $src;", []>; 367 def CVT_INREG_s32_s8 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), 368 "cvt.s32.s8 \t$dst, $src;", []>; 369 def CVT_INREG_s32_s16 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), 370 "cvt.s32.s16 \t$dst, $src;", []>; 371 def CVT_INREG_s64_s8 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), 372 "cvt.s64.s8 \t$dst, $src;", []>; 373 def CVT_INREG_s64_s16 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), 374 "cvt.s64.s16 \t$dst, $src;", []>; 375 def CVT_INREG_s64_s32 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), 376 "cvt.s64.s32 \t$dst, $src;", []>; 377 } 378 379 //----------------------------------- 380 // Integer Arithmetic 381 //----------------------------------- 382 383 multiclass ADD_SUB_i1<SDNode OpNode> { 384 def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b), 385 "xor.pred \t$dst, $a, $b;", 386 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>; 387 def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b), 388 "xor.pred \t$dst, $a, $b;", 389 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>; 390 } 391 392 defm ADD_i1 : ADD_SUB_i1<add>; 393 defm SUB_i1 : ADD_SUB_i1<sub>; 394 395 396 defm ADD : I3<"add.s", add>; 397 defm SUB : I3<"sub.s", sub>; 398 399 defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>; 400 defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>; 401 402 defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>; 403 defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>; 404 405 //mul.wide PTX instruction 406 def SInt32Const : PatLeaf<(imm), [{ 407 const APInt &v = N->getAPIntValue(); 408 if (v.isSignedIntN(32)) 409 return true; 410 return false; 411 }]>; 412 413 def UInt32Const : PatLeaf<(imm), [{ 414 const APInt &v = N->getAPIntValue(); 415 if (v.isIntN(32)) 416 return true; 417 return false; 418 }]>; 419 420 def SInt16Const : PatLeaf<(imm), [{ 421 const APInt &v = N->getAPIntValue(); 422 if (v.isSignedIntN(16)) 423 return true; 424 return false; 425 }]>; 426 427 def UInt16Const : PatLeaf<(imm), [{ 428 const APInt &v = N->getAPIntValue(); 429 if (v.isIntN(16)) 430 return true; 431 return false; 432 }]>; 433 434 def Int5Const : PatLeaf<(imm), [{ 435 const APInt &v = N->getAPIntValue(); 436 // Check if 0 <= v < 32 437 // Only then the result from (x << v) will be i32 438 if (v.sge(0) && v.slt(32)) 439 return true; 440 return false; 441 }]>; 442 443 def Int4Const : PatLeaf<(imm), [{ 444 const APInt &v = N->getAPIntValue(); 445 // Check if 0 <= v < 16 446 // Only then the result from (x << v) will be i16 447 if (v.sge(0) && v.slt(16)) 448 return true; 449 return false; 450 }]>; 451 452 def SHL2MUL32 : SDNodeXForm<imm, [{ 453 const APInt &v = N->getAPIntValue(); 454 APInt temp(32, 1); 455 return CurDAG->getTargetConstant(temp.shl(v), SDLoc(N), MVT::i32); 456 }]>; 457 458 def SHL2MUL16 : SDNodeXForm<imm, [{ 459 const APInt &v = N->getAPIntValue(); 460 APInt temp(16, 1); 461 return CurDAG->getTargetConstant(temp.shl(v), SDLoc(N), MVT::i16); 462 }]>; 463 464 def MULWIDES64 465 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), 466 "mul.wide.s32 \t$dst, $a, $b;", []>; 467 def MULWIDES64Imm 468 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 469 "mul.wide.s32 \t$dst, $a, $b;", []>; 470 def MULWIDES64Imm64 471 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b), 472 "mul.wide.s32 \t$dst, $a, $b;", []>; 473 474 def MULWIDEU64 475 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), 476 "mul.wide.u32 \t$dst, $a, $b;", []>; 477 def MULWIDEU64Imm 478 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 479 "mul.wide.u32 \t$dst, $a, $b;", []>; 480 def MULWIDEU64Imm64 481 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b), 482 "mul.wide.u32 \t$dst, $a, $b;", []>; 483 484 def MULWIDES32 485 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), 486 "mul.wide.s16 \t$dst, $a, $b;", []>; 487 def MULWIDES32Imm 488 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b), 489 "mul.wide.s16 \t$dst, $a, $b;", []>; 490 def MULWIDES32Imm32 491 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b), 492 "mul.wide.s16 \t$dst, $a, $b;", []>; 493 494 def MULWIDEU32 495 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), 496 "mul.wide.u16 \t$dst, $a, $b;", []>; 497 def MULWIDEU32Imm 498 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b), 499 "mul.wide.u16 \t$dst, $a, $b;", []>; 500 def MULWIDEU32Imm32 501 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b), 502 "mul.wide.u16 \t$dst, $a, $b;", []>; 503 504 def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)), 505 (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>, 506 Requires<[doMulWide]>; 507 def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)), 508 (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>, 509 Requires<[doMulWide]>; 510 511 def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)), 512 (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>, 513 Requires<[doMulWide]>; 514 def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)), 515 (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>, 516 Requires<[doMulWide]>; 517 518 def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)), 519 (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>, 520 Requires<[doMulWide]>; 521 def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)), 522 (MULWIDES64Imm64 Int32Regs:$a, (i64 SInt32Const:$b))>, 523 Requires<[doMulWide]>; 524 525 def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)), 526 (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, 527 Requires<[doMulWide]>; 528 def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)), 529 (MULWIDEU64Imm64 Int32Regs:$a, (i64 UInt32Const:$b))>, 530 Requires<[doMulWide]>; 531 532 def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)), 533 (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, 534 Requires<[doMulWide]>; 535 def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)), 536 (MULWIDES32Imm32 Int16Regs:$a, (i32 SInt16Const:$b))>, 537 Requires<[doMulWide]>; 538 539 def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)), 540 (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, 541 Requires<[doMulWide]>; 542 def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)), 543 (MULWIDEU32Imm32 Int16Regs:$a, (i32 UInt16Const:$b))>, 544 Requires<[doMulWide]>; 545 546 547 def SDTMulWide 548 : SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>]>; 549 def mul_wide_signed 550 : SDNode<"NVPTXISD::MUL_WIDE_SIGNED", SDTMulWide>; 551 def mul_wide_unsigned 552 : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>; 553 554 def : Pat<(i32 (mul_wide_signed Int16Regs:$a, Int16Regs:$b)), 555 (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, 556 Requires<[doMulWide]>; 557 def : Pat<(i32 (mul_wide_signed Int16Regs:$a, imm:$b)), 558 (MULWIDES32Imm Int16Regs:$a, imm:$b)>, 559 Requires<[doMulWide]>; 560 def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, Int16Regs:$b)), 561 (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, 562 Requires<[doMulWide]>; 563 def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)), 564 (MULWIDEU32Imm Int16Regs:$a, imm:$b)>, 565 Requires<[doMulWide]>; 566 567 568 def : Pat<(i64 (mul_wide_signed Int32Regs:$a, Int32Regs:$b)), 569 (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>, 570 Requires<[doMulWide]>; 571 def : Pat<(i64 (mul_wide_signed Int32Regs:$a, imm:$b)), 572 (MULWIDES64Imm Int32Regs:$a, imm:$b)>, 573 Requires<[doMulWide]>; 574 def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, Int32Regs:$b)), 575 (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, 576 Requires<[doMulWide]>; 577 def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, imm:$b)), 578 (MULWIDEU64Imm Int32Regs:$a, imm:$b)>, 579 Requires<[doMulWide]>; 580 581 defm MULT : I3<"mul.lo.s", mul>; 582 583 defm MULTHS : I3<"mul.hi.s", mulhs>; 584 defm MULTHU : I3<"mul.hi.u", mulhu>; 585 586 defm SDIV : I3<"div.s", sdiv>; 587 defm UDIV : I3<"div.u", udiv>; 588 589 defm SREM : I3<"rem.s", srem>; 590 // The ri version will not be selected as DAGCombiner::visitSREM will lower it. 591 defm UREM : I3<"rem.u", urem>; 592 // The ri version will not be selected as DAGCombiner::visitUREM will lower it. 593 594 def SDTIMAD 595 : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, 596 SDTCisInt<2>, SDTCisSameAs<0, 2>, 597 SDTCisSameAs<0, 3>]>; 598 def imad 599 : SDNode<"NVPTXISD::IMAD", SDTIMAD>; 600 601 def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst), 602 (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c), 603 "mad.lo.s16 \t$dst, $a, $b, $c;", 604 [(set Int16Regs:$dst, 605 (imad Int16Regs:$a, Int16Regs:$b, Int16Regs:$c))]>; 606 def MAD16rri : NVPTXInst<(outs Int16Regs:$dst), 607 (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c), 608 "mad.lo.s16 \t$dst, $a, $b, $c;", 609 [(set Int16Regs:$dst, 610 (imad Int16Regs:$a, Int16Regs:$b, imm:$c))]>; 611 def MAD16rir : NVPTXInst<(outs Int16Regs:$dst), 612 (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c), 613 "mad.lo.s16 \t$dst, $a, $b, $c;", 614 [(set Int16Regs:$dst, 615 (imad Int16Regs:$a, imm:$b, Int16Regs:$c))]>; 616 def MAD16rii : NVPTXInst<(outs Int16Regs:$dst), 617 (ins Int16Regs:$a, i16imm:$b, i16imm:$c), 618 "mad.lo.s16 \t$dst, $a, $b, $c;", 619 [(set Int16Regs:$dst, 620 (imad Int16Regs:$a, imm:$b, imm:$c))]>; 621 622 def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst), 623 (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c), 624 "mad.lo.s32 \t$dst, $a, $b, $c;", 625 [(set Int32Regs:$dst, 626 (imad Int32Regs:$a, Int32Regs:$b, Int32Regs:$c))]>; 627 def MAD32rri : NVPTXInst<(outs Int32Regs:$dst), 628 (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c), 629 "mad.lo.s32 \t$dst, $a, $b, $c;", 630 [(set Int32Regs:$dst, 631 (imad Int32Regs:$a, Int32Regs:$b, imm:$c))]>; 632 def MAD32rir : NVPTXInst<(outs Int32Regs:$dst), 633 (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c), 634 "mad.lo.s32 \t$dst, $a, $b, $c;", 635 [(set Int32Regs:$dst, 636 (imad Int32Regs:$a, imm:$b, Int32Regs:$c))]>; 637 def MAD32rii : NVPTXInst<(outs Int32Regs:$dst), 638 (ins Int32Regs:$a, i32imm:$b, i32imm:$c), 639 "mad.lo.s32 \t$dst, $a, $b, $c;", 640 [(set Int32Regs:$dst, 641 (imad Int32Regs:$a, imm:$b, imm:$c))]>; 642 643 def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst), 644 (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c), 645 "mad.lo.s64 \t$dst, $a, $b, $c;", 646 [(set Int64Regs:$dst, 647 (imad Int64Regs:$a, Int64Regs:$b, Int64Regs:$c))]>; 648 def MAD64rri : NVPTXInst<(outs Int64Regs:$dst), 649 (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c), 650 "mad.lo.s64 \t$dst, $a, $b, $c;", 651 [(set Int64Regs:$dst, 652 (imad Int64Regs:$a, Int64Regs:$b, imm:$c))]>; 653 def MAD64rir : NVPTXInst<(outs Int64Regs:$dst), 654 (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c), 655 "mad.lo.s64 \t$dst, $a, $b, $c;", 656 [(set Int64Regs:$dst, 657 (imad Int64Regs:$a, imm:$b, Int64Regs:$c))]>; 658 def MAD64rii : NVPTXInst<(outs Int64Regs:$dst), 659 (ins Int64Regs:$a, i64imm:$b, i64imm:$c), 660 "mad.lo.s64 \t$dst, $a, $b, $c;", 661 [(set Int64Regs:$dst, 662 (imad Int64Regs:$a, imm:$b, imm:$c))]>; 663 664 def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), 665 "neg.s16 \t$dst, $src;", 666 [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>; 667 def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), 668 "neg.s32 \t$dst, $src;", 669 [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>; 670 def INEG64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), 671 "neg.s64 \t$dst, $src;", 672 [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>; 673 674 //----------------------------------- 675 // Floating Point Arithmetic 676 //----------------------------------- 677 678 // Constant 1.0f 679 def FloatConst1 : PatLeaf<(fpimm), [{ 680 if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEsingle) 681 return false; 682 float f = (float)N->getValueAPF().convertToFloat(); 683 return (f==1.0f); 684 }]>; 685 // Constand (double)1.0 686 def DoubleConst1 : PatLeaf<(fpimm), [{ 687 if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEdouble) 688 return false; 689 double d = (double)N->getValueAPF().convertToDouble(); 690 return (d==1.0); 691 }]>; 692 693 defm FADD : F3<"add", fadd>; 694 defm FSUB : F3<"sub", fsub>; 695 defm FMUL : F3<"mul", fmul>; 696 697 defm FADD_rn : F3_rn<"add", fadd>; 698 defm FSUB_rn : F3_rn<"sub", fsub>; 699 defm FMUL_rn : F3_rn<"mul", fmul>; 700 701 defm FABS : F2<"abs", fabs>; 702 defm FNEG : F2<"neg", fneg>; 703 defm FSQRT : F2<"sqrt.rn", fsqrt>; 704 705 // 706 // F64 division 707 // 708 def FDIV641r : NVPTXInst<(outs Float64Regs:$dst), 709 (ins f64imm:$a, Float64Regs:$b), 710 "rcp.rn.f64 \t$dst, $b;", 711 [(set Float64Regs:$dst, 712 (fdiv DoubleConst1:$a, Float64Regs:$b))]>; 713 def FDIV64rr : NVPTXInst<(outs Float64Regs:$dst), 714 (ins Float64Regs:$a, Float64Regs:$b), 715 "div.rn.f64 \t$dst, $a, $b;", 716 [(set Float64Regs:$dst, 717 (fdiv Float64Regs:$a, Float64Regs:$b))]>; 718 def FDIV64ri : NVPTXInst<(outs Float64Regs:$dst), 719 (ins Float64Regs:$a, f64imm:$b), 720 "div.rn.f64 \t$dst, $a, $b;", 721 [(set Float64Regs:$dst, 722 (fdiv Float64Regs:$a, fpimm:$b))]>; 723 724 // 725 // F32 Approximate reciprocal 726 // 727 def FDIV321r_ftz : NVPTXInst<(outs Float32Regs:$dst), 728 (ins f32imm:$a, Float32Regs:$b), 729 "rcp.approx.ftz.f32 \t$dst, $b;", 730 [(set Float32Regs:$dst, 731 (fdiv FloatConst1:$a, Float32Regs:$b))]>, 732 Requires<[do_DIVF32_APPROX, doF32FTZ]>; 733 def FDIV321r : NVPTXInst<(outs Float32Regs:$dst), 734 (ins f32imm:$a, Float32Regs:$b), 735 "rcp.approx.f32 \t$dst, $b;", 736 [(set Float32Regs:$dst, 737 (fdiv FloatConst1:$a, Float32Regs:$b))]>, 738 Requires<[do_DIVF32_APPROX]>; 739 // 740 // F32 Approximate division 741 // 742 def FDIV32approxrr_ftz : NVPTXInst<(outs Float32Regs:$dst), 743 (ins Float32Regs:$a, Float32Regs:$b), 744 "div.approx.ftz.f32 \t$dst, $a, $b;", 745 [(set Float32Regs:$dst, 746 (fdiv Float32Regs:$a, Float32Regs:$b))]>, 747 Requires<[do_DIVF32_APPROX, doF32FTZ]>; 748 def FDIV32approxri_ftz : NVPTXInst<(outs Float32Regs:$dst), 749 (ins Float32Regs:$a, f32imm:$b), 750 "div.approx.ftz.f32 \t$dst, $a, $b;", 751 [(set Float32Regs:$dst, 752 (fdiv Float32Regs:$a, fpimm:$b))]>, 753 Requires<[do_DIVF32_APPROX, doF32FTZ]>; 754 def FDIV32approxrr : NVPTXInst<(outs Float32Regs:$dst), 755 (ins Float32Regs:$a, Float32Regs:$b), 756 "div.approx.f32 \t$dst, $a, $b;", 757 [(set Float32Regs:$dst, 758 (fdiv Float32Regs:$a, Float32Regs:$b))]>, 759 Requires<[do_DIVF32_APPROX]>; 760 def FDIV32approxri : NVPTXInst<(outs Float32Regs:$dst), 761 (ins Float32Regs:$a, f32imm:$b), 762 "div.approx.f32 \t$dst, $a, $b;", 763 [(set Float32Regs:$dst, 764 (fdiv Float32Regs:$a, fpimm:$b))]>, 765 Requires<[do_DIVF32_APPROX]>; 766 // 767 // F32 Semi-accurate reciprocal 768 // 769 // rcp.approx gives the same result as div.full(1.0f, a) and is faster. 770 // 771 def FDIV321r_approx_ftz : NVPTXInst<(outs Float32Regs:$dst), 772 (ins f32imm:$a, Float32Regs:$b), 773 "rcp.approx.ftz.f32 \t$dst, $b;", 774 [(set Float32Regs:$dst, 775 (fdiv FloatConst1:$a, Float32Regs:$b))]>, 776 Requires<[do_DIVF32_FULL, doF32FTZ]>; 777 def FDIV321r_approx : NVPTXInst<(outs Float32Regs:$dst), 778 (ins f32imm:$a, Float32Regs:$b), 779 "rcp.approx.f32 \t$dst, $b;", 780 [(set Float32Regs:$dst, 781 (fdiv FloatConst1:$a, Float32Regs:$b))]>, 782 Requires<[do_DIVF32_FULL]>; 783 // 784 // F32 Semi-accurate division 785 // 786 def FDIV32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), 787 (ins Float32Regs:$a, Float32Regs:$b), 788 "div.full.ftz.f32 \t$dst, $a, $b;", 789 [(set Float32Regs:$dst, 790 (fdiv Float32Regs:$a, Float32Regs:$b))]>, 791 Requires<[do_DIVF32_FULL, doF32FTZ]>; 792 def FDIV32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), 793 (ins Float32Regs:$a, f32imm:$b), 794 "div.full.ftz.f32 \t$dst, $a, $b;", 795 [(set Float32Regs:$dst, 796 (fdiv Float32Regs:$a, fpimm:$b))]>, 797 Requires<[do_DIVF32_FULL, doF32FTZ]>; 798 def FDIV32rr : NVPTXInst<(outs Float32Regs:$dst), 799 (ins Float32Regs:$a, Float32Regs:$b), 800 "div.full.f32 \t$dst, $a, $b;", 801 [(set Float32Regs:$dst, 802 (fdiv Float32Regs:$a, Float32Regs:$b))]>, 803 Requires<[do_DIVF32_FULL]>; 804 def FDIV32ri : NVPTXInst<(outs Float32Regs:$dst), 805 (ins Float32Regs:$a, f32imm:$b), 806 "div.full.f32 \t$dst, $a, $b;", 807 [(set Float32Regs:$dst, 808 (fdiv Float32Regs:$a, fpimm:$b))]>, 809 Requires<[do_DIVF32_FULL]>; 810 // 811 // F32 Accurate reciprocal 812 // 813 def FDIV321r_prec_ftz : NVPTXInst<(outs Float32Regs:$dst), 814 (ins f32imm:$a, Float32Regs:$b), 815 "rcp.rn.ftz.f32 \t$dst, $b;", 816 [(set Float32Regs:$dst, 817 (fdiv FloatConst1:$a, Float32Regs:$b))]>, 818 Requires<[reqPTX20, doF32FTZ]>; 819 def FDIV321r_prec : NVPTXInst<(outs Float32Regs:$dst), 820 (ins f32imm:$a, Float32Regs:$b), 821 "rcp.rn.f32 \t$dst, $b;", 822 [(set Float32Regs:$dst, 823 (fdiv FloatConst1:$a, Float32Regs:$b))]>, 824 Requires<[reqPTX20]>; 825 // 826 // F32 Accurate division 827 // 828 def FDIV32rr_prec_ftz : NVPTXInst<(outs Float32Regs:$dst), 829 (ins Float32Regs:$a, Float32Regs:$b), 830 "div.rn.ftz.f32 \t$dst, $a, $b;", 831 [(set Float32Regs:$dst, 832 (fdiv Float32Regs:$a, Float32Regs:$b))]>, 833 Requires<[doF32FTZ, reqPTX20]>; 834 def FDIV32ri_prec_ftz : NVPTXInst<(outs Float32Regs:$dst), 835 (ins Float32Regs:$a, f32imm:$b), 836 "div.rn.ftz.f32 \t$dst, $a, $b;", 837 [(set Float32Regs:$dst, 838 (fdiv Float32Regs:$a, fpimm:$b))]>, 839 Requires<[doF32FTZ, reqPTX20]>; 840 def FDIV32rr_prec : NVPTXInst<(outs Float32Regs:$dst), 841 (ins Float32Regs:$a, Float32Regs:$b), 842 "div.rn.f32 \t$dst, $a, $b;", 843 [(set Float32Regs:$dst, 844 (fdiv Float32Regs:$a, Float32Regs:$b))]>, 845 Requires<[reqPTX20]>; 846 def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst), 847 (ins Float32Regs:$a, f32imm:$b), 848 "div.rn.f32 \t$dst, $a, $b;", 849 [(set Float32Regs:$dst, 850 (fdiv Float32Regs:$a, fpimm:$b))]>, 851 Requires<[reqPTX20]>; 852 853 // 854 // F32 rsqrt 855 // 856 857 def RSQRTF32approx1r : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$b), 858 "rsqrt.approx.f32 \t$dst, $b;", []>; 859 860 def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$b)), 861 (RSQRTF32approx1r Float32Regs:$b)>, 862 Requires<[do_DIVF32_FULL, do_SQRTF32_APPROX, doNoF32FTZ]>; 863 864 multiclass FPCONTRACT32<string OpcStr, Predicate Pred> { 865 def rrr : NVPTXInst<(outs Float32Regs:$dst), 866 (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c), 867 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 868 [(set Float32Regs:$dst, 869 (fma Float32Regs:$a, Float32Regs:$b, Float32Regs:$c))]>, 870 Requires<[Pred]>; 871 def rri : NVPTXInst<(outs Float32Regs:$dst), 872 (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c), 873 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 874 [(set Float32Regs:$dst, 875 (fma Float32Regs:$a, Float32Regs:$b, fpimm:$c))]>, 876 Requires<[Pred]>; 877 def rir : NVPTXInst<(outs Float32Regs:$dst), 878 (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c), 879 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 880 [(set Float32Regs:$dst, 881 (fma Float32Regs:$a, fpimm:$b, Float32Regs:$c))]>, 882 Requires<[Pred]>; 883 def rii : NVPTXInst<(outs Float32Regs:$dst), 884 (ins Float32Regs:$a, f32imm:$b, f32imm:$c), 885 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 886 [(set Float32Regs:$dst, 887 (fma Float32Regs:$a, fpimm:$b, fpimm:$c))]>, 888 Requires<[Pred]>; 889 } 890 891 multiclass FPCONTRACT64<string OpcStr, Predicate Pred> { 892 def rrr : NVPTXInst<(outs Float64Regs:$dst), 893 (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c), 894 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 895 [(set Float64Regs:$dst, 896 (fma Float64Regs:$a, Float64Regs:$b, Float64Regs:$c))]>, 897 Requires<[Pred]>; 898 def rri : NVPTXInst<(outs Float64Regs:$dst), 899 (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c), 900 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 901 [(set Float64Regs:$dst, 902 (fma Float64Regs:$a, Float64Regs:$b, fpimm:$c))]>, 903 Requires<[Pred]>; 904 def rir : NVPTXInst<(outs Float64Regs:$dst), 905 (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c), 906 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 907 [(set Float64Regs:$dst, 908 (fma Float64Regs:$a, fpimm:$b, Float64Regs:$c))]>, 909 Requires<[Pred]>; 910 def rii : NVPTXInst<(outs Float64Regs:$dst), 911 (ins Float64Regs:$a, f64imm:$b, f64imm:$c), 912 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 913 [(set Float64Regs:$dst, 914 (fma Float64Regs:$a, fpimm:$b, fpimm:$c))]>, 915 Requires<[Pred]>; 916 } 917 918 defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doF32FTZ>; 919 defm FMA32 : FPCONTRACT32<"fma.rn.f32", true>; 920 defm FMA64 : FPCONTRACT64<"fma.rn.f64", true>; 921 922 def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), 923 "sin.approx.f32 \t$dst, $src;", 924 [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>; 925 def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), 926 "cos.approx.f32 \t$dst, $src;", 927 [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>; 928 929 // Lower (frem x, y) into (sub x, (mul (floor (div x, y)) y)) 930 // e.g. "poor man's fmod()" 931 932 // frem - f32 FTZ 933 def : Pat<(frem Float32Regs:$x, Float32Regs:$y), 934 (FSUBf32rr_ftz Float32Regs:$x, (FMULf32rr_ftz (CVT_f32_f32 935 (FDIV32rr_prec_ftz Float32Regs:$x, Float32Regs:$y), CvtRMI_FTZ), 936 Float32Regs:$y))>, 937 Requires<[doF32FTZ]>; 938 def : Pat<(frem Float32Regs:$x, fpimm:$y), 939 (FSUBf32rr_ftz Float32Regs:$x, (FMULf32ri_ftz (CVT_f32_f32 940 (FDIV32ri_prec_ftz Float32Regs:$x, fpimm:$y), CvtRMI_FTZ), 941 fpimm:$y))>, 942 Requires<[doF32FTZ]>; 943 944 // frem - f32 945 def : Pat<(frem Float32Regs:$x, Float32Regs:$y), 946 (FSUBf32rr Float32Regs:$x, (FMULf32rr (CVT_f32_f32 947 (FDIV32rr_prec Float32Regs:$x, Float32Regs:$y), CvtRMI), 948 Float32Regs:$y))>; 949 def : Pat<(frem Float32Regs:$x, fpimm:$y), 950 (FSUBf32rr Float32Regs:$x, (FMULf32ri (CVT_f32_f32 951 (FDIV32ri_prec Float32Regs:$x, fpimm:$y), CvtRMI), 952 fpimm:$y))>; 953 954 // frem - f64 955 def : Pat<(frem Float64Regs:$x, Float64Regs:$y), 956 (FSUBf64rr Float64Regs:$x, (FMULf64rr (CVT_f64_f64 957 (FDIV64rr Float64Regs:$x, Float64Regs:$y), CvtRMI), 958 Float64Regs:$y))>; 959 def : Pat<(frem Float64Regs:$x, fpimm:$y), 960 (FSUBf64rr Float64Regs:$x, (FMULf64ri (CVT_f64_f64 961 (FDIV64ri Float64Regs:$x, fpimm:$y), CvtRMI), 962 fpimm:$y))>; 963 964 //----------------------------------- 965 // Logical Arithmetic 966 //----------------------------------- 967 968 multiclass LOG_FORMAT<string OpcStr, SDNode OpNode> { 969 def b1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b), 970 !strconcat(OpcStr, ".pred \t$dst, $a, $b;"), 971 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>; 972 def b1ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b), 973 !strconcat(OpcStr, ".pred \t$dst, $a, $b;"), 974 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>; 975 def b16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), 976 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), 977 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, 978 Int16Regs:$b))]>; 979 def b16ri: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), 980 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), 981 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>; 982 def b32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), 983 !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"), 984 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, 985 Int32Regs:$b))]>; 986 def b32ri: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 987 !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"), 988 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; 989 def b64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), 990 !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"), 991 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, 992 Int64Regs:$b))]>; 993 def b64ri: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), 994 !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"), 995 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; 996 } 997 998 defm OR : LOG_FORMAT<"or", or>; 999 defm AND : LOG_FORMAT<"and", and>; 1000 defm XOR : LOG_FORMAT<"xor", xor>; 1001 1002 def NOT1: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src), 1003 "not.pred \t$dst, $src;", 1004 [(set Int1Regs:$dst, (not Int1Regs:$src))]>; 1005 def NOT16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), 1006 "not.b16 \t$dst, $src;", 1007 [(set Int16Regs:$dst, (not Int16Regs:$src))]>; 1008 def NOT32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), 1009 "not.b32 \t$dst, $src;", 1010 [(set Int32Regs:$dst, (not Int32Regs:$src))]>; 1011 def NOT64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), 1012 "not.b64 \t$dst, $src;", 1013 [(set Int64Regs:$dst, (not Int64Regs:$src))]>; 1014 1015 // For shifts, the second src operand must be 32-bit value 1016 multiclass LSHIFT_FORMAT<string OpcStr, SDNode OpNode> { 1017 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, 1018 Int32Regs:$b), 1019 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 1020 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, 1021 Int32Regs:$b))]>; 1022 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b), 1023 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 1024 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, 1025 (i32 imm:$b)))]>; 1026 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, 1027 Int32Regs:$b), 1028 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 1029 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, 1030 Int32Regs:$b))]>; 1031 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 1032 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 1033 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, 1034 (i32 imm:$b)))]>; 1035 def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b), 1036 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 1037 [(set Int32Regs:$dst, (OpNode (i32 imm:$a), 1038 (i32 imm:$b)))]>; 1039 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, 1040 Int32Regs:$b), 1041 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 1042 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, 1043 Int32Regs:$b))]>; 1044 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b), 1045 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 1046 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, 1047 (i32 imm:$b)))]>; 1048 } 1049 1050 defm SHL : LSHIFT_FORMAT<"shl.b", shl>; 1051 1052 // For shifts, the second src operand must be 32-bit value 1053 // Need to add cvt for the 8-bits. 1054 multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode> { 1055 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, 1056 Int32Regs:$b), 1057 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 1058 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, 1059 Int32Regs:$b))]>; 1060 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b), 1061 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 1062 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, 1063 (i32 imm:$b)))]>; 1064 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, 1065 Int32Regs:$b), 1066 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 1067 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, 1068 Int32Regs:$b))]>; 1069 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 1070 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 1071 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, 1072 (i32 imm:$b)))]>; 1073 def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b), 1074 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 1075 [(set Int32Regs:$dst, (OpNode (i32 imm:$a), 1076 (i32 imm:$b)))]>; 1077 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, 1078 Int32Regs:$b), 1079 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 1080 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, 1081 Int32Regs:$b))]>; 1082 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b), 1083 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 1084 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, 1085 (i32 imm:$b)))]>; 1086 } 1087 1088 defm SRA : RSHIFT_FORMAT<"shr.s", sra>; 1089 defm SRL : RSHIFT_FORMAT<"shr.u", srl>; 1090 1091 // 1092 // Rotate: use ptx shf instruction if available. 1093 // 1094 1095 // 32 bit r2 = rotl r1, n 1096 // => 1097 // r2 = shf.l r1, r1, n 1098 def ROTL32imm_hw : NVPTXInst<(outs Int32Regs:$dst), 1099 (ins Int32Regs:$src, i32imm:$amt), 1100 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 1101 [(set Int32Regs:$dst, (rotl Int32Regs:$src, (i32 imm:$amt)))]>, 1102 Requires<[hasHWROT32]> ; 1103 1104 def ROTL32reg_hw : NVPTXInst<(outs Int32Regs:$dst), 1105 (ins Int32Regs:$src, Int32Regs:$amt), 1106 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 1107 [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>, 1108 Requires<[hasHWROT32]>; 1109 1110 // 32 bit r2 = rotr r1, n 1111 // => 1112 // r2 = shf.r r1, r1, n 1113 def ROTR32imm_hw : NVPTXInst<(outs Int32Regs:$dst), 1114 (ins Int32Regs:$src, i32imm:$amt), 1115 "shf.r.wrap.b32 \t$dst, $src, $src, $amt;", 1116 [(set Int32Regs:$dst, (rotr Int32Regs:$src, (i32 imm:$amt)))]>, 1117 Requires<[hasHWROT32]>; 1118 1119 def ROTR32reg_hw : NVPTXInst<(outs Int32Regs:$dst), 1120 (ins Int32Regs:$src, Int32Regs:$amt), 1121 "shf.r.wrap.b32 \t$dst, $src, $src, $amt;", 1122 [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>, 1123 Requires<[hasHWROT32]>; 1124 1125 // 1126 // Rotate: if ptx shf instruction is not available, then use shift+add 1127 // 1128 // 32bit 1129 def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst), 1130 (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2), 1131 !strconcat("{{\n\t", 1132 !strconcat(".reg .b32 %lhs;\n\t", 1133 !strconcat(".reg .b32 %rhs;\n\t", 1134 !strconcat("shl.b32 \t%lhs, $src, $amt1;\n\t", 1135 !strconcat("shr.b32 \t%rhs, $src, $amt2;\n\t", 1136 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", 1137 !strconcat("}}", ""))))))), 1138 []>; 1139 1140 def SUB_FRM_32 : SDNodeXForm<imm, [{ 1141 return CurDAG->getTargetConstant(32-N->getZExtValue(), SDLoc(N), MVT::i32); 1142 }]>; 1143 1144 def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)), 1145 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, 1146 Requires<[noHWROT32]>; 1147 def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)), 1148 (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>, 1149 Requires<[noHWROT32]>; 1150 1151 def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, 1152 Int32Regs:$amt), 1153 !strconcat("{{\n\t", 1154 !strconcat(".reg .b32 %lhs;\n\t", 1155 !strconcat(".reg .b32 %rhs;\n\t", 1156 !strconcat(".reg .b32 %amt2;\n\t", 1157 !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t", 1158 !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t", 1159 !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t", 1160 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", 1161 !strconcat("}}", ""))))))))), 1162 [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>, 1163 Requires<[noHWROT32]>; 1164 1165 def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, 1166 Int32Regs:$amt), 1167 !strconcat("{{\n\t", 1168 !strconcat(".reg .b32 %lhs;\n\t", 1169 !strconcat(".reg .b32 %rhs;\n\t", 1170 !strconcat(".reg .b32 %amt2;\n\t", 1171 !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t", 1172 !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t", 1173 !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t", 1174 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", 1175 !strconcat("}}", ""))))))))), 1176 [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>, 1177 Requires<[noHWROT32]>; 1178 1179 // 64bit 1180 def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, 1181 i32imm:$amt1, i32imm:$amt2), 1182 !strconcat("{{\n\t", 1183 !strconcat(".reg .b64 %lhs;\n\t", 1184 !strconcat(".reg .b64 %rhs;\n\t", 1185 !strconcat("shl.b64 \t%lhs, $src, $amt1;\n\t", 1186 !strconcat("shr.b64 \t%rhs, $src, $amt2;\n\t", 1187 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t", 1188 !strconcat("}}", ""))))))), 1189 []>; 1190 1191 def SUB_FRM_64 : SDNodeXForm<imm, [{ 1192 return CurDAG->getTargetConstant(64-N->getZExtValue(), SDLoc(N), MVT::i32); 1193 }]>; 1194 1195 def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)), 1196 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>; 1197 def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)), 1198 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>; 1199 1200 def ROTL64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, 1201 Int32Regs:$amt), 1202 !strconcat("{{\n\t", 1203 !strconcat(".reg .b64 %lhs;\n\t", 1204 !strconcat(".reg .b64 %rhs;\n\t", 1205 !strconcat(".reg .u32 %amt2;\n\t", 1206 !strconcat("shl.b64 \t%lhs, $src, $amt;\n\t", 1207 !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t", 1208 !strconcat("shr.b64 \t%rhs, $src, %amt2;\n\t", 1209 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t", 1210 !strconcat("}}", ""))))))))), 1211 [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>; 1212 1213 def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, 1214 Int32Regs:$amt), 1215 !strconcat("{{\n\t", 1216 !strconcat(".reg .b64 %lhs;\n\t", 1217 !strconcat(".reg .b64 %rhs;\n\t", 1218 !strconcat(".reg .u32 %amt2;\n\t", 1219 !strconcat("shr.b64 \t%lhs, $src, $amt;\n\t", 1220 !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t", 1221 !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t", 1222 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t", 1223 !strconcat("}}", ""))))))))), 1224 [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>; 1225 1226 // BFE - bit-field extract 1227 1228 multiclass BFE<string TyStr, RegisterClass RC> { 1229 // BFE supports both 32-bit and 64-bit values, but the start and length 1230 // operands are always 32-bit 1231 def rrr 1232 : NVPTXInst<(outs RC:$d), 1233 (ins RC:$a, Int32Regs:$b, Int32Regs:$c), 1234 !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>; 1235 def rri 1236 : NVPTXInst<(outs RC:$d), 1237 (ins RC:$a, Int32Regs:$b, i32imm:$c), 1238 !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>; 1239 def rii 1240 : NVPTXInst<(outs RC:$d), 1241 (ins RC:$a, i32imm:$b, i32imm:$c), 1242 !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>; 1243 } 1244 1245 defm BFE_S32 : BFE<"s32", Int32Regs>; 1246 defm BFE_U32 : BFE<"u32", Int32Regs>; 1247 defm BFE_S64 : BFE<"s64", Int64Regs>; 1248 defm BFE_U64 : BFE<"u64", Int64Regs>; 1249 1250 //----------------------------------- 1251 // General Comparison 1252 //----------------------------------- 1253 1254 // General setp instructions 1255 multiclass SETP<string TypeStr, RegisterClass RC, Operand ImmCls> { 1256 def rr : NVPTXInst<(outs Int1Regs:$dst), 1257 (ins RC:$a, RC:$b, CmpMode:$cmp), 1258 !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"), 1259 []>; 1260 def ri : NVPTXInst<(outs Int1Regs:$dst), 1261 (ins RC:$a, ImmCls:$b, CmpMode:$cmp), 1262 !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"), 1263 []>; 1264 def ir : NVPTXInst<(outs Int1Regs:$dst), 1265 (ins ImmCls:$a, RC:$b, CmpMode:$cmp), 1266 !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"), 1267 []>; 1268 } 1269 1270 defm SETP_b16 : SETP<"b16", Int16Regs, i16imm>; 1271 defm SETP_s16 : SETP<"s16", Int16Regs, i16imm>; 1272 defm SETP_u16 : SETP<"u16", Int16Regs, i16imm>; 1273 defm SETP_b32 : SETP<"b32", Int32Regs, i32imm>; 1274 defm SETP_s32 : SETP<"s32", Int32Regs, i32imm>; 1275 defm SETP_u32 : SETP<"u32", Int32Regs, i32imm>; 1276 defm SETP_b64 : SETP<"b64", Int64Regs, i64imm>; 1277 defm SETP_s64 : SETP<"s64", Int64Regs, i64imm>; 1278 defm SETP_u64 : SETP<"u64", Int64Regs, i64imm>; 1279 defm SETP_f32 : SETP<"f32", Float32Regs, f32imm>; 1280 defm SETP_f64 : SETP<"f64", Float64Regs, f64imm>; 1281 1282 // General set instructions 1283 multiclass SET<string TypeStr, RegisterClass RC, Operand ImmCls> { 1284 def rr : NVPTXInst<(outs Int32Regs:$dst), 1285 (ins RC:$a, RC:$b, CmpMode:$cmp), 1286 !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>; 1287 def ri : NVPTXInst<(outs Int32Regs:$dst), 1288 (ins RC:$a, ImmCls:$b, CmpMode:$cmp), 1289 !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>; 1290 def ir : NVPTXInst<(outs Int32Regs:$dst), 1291 (ins ImmCls:$a, RC:$b, CmpMode:$cmp), 1292 !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>; 1293 } 1294 1295 defm SET_b16 : SET<"b16", Int16Regs, i16imm>; 1296 defm SET_s16 : SET<"s16", Int16Regs, i16imm>; 1297 defm SET_u16 : SET<"u16", Int16Regs, i16imm>; 1298 defm SET_b32 : SET<"b32", Int32Regs, i32imm>; 1299 defm SET_s32 : SET<"s32", Int32Regs, i32imm>; 1300 defm SET_u32 : SET<"u32", Int32Regs, i32imm>; 1301 defm SET_b64 : SET<"b64", Int64Regs, i64imm>; 1302 defm SET_s64 : SET<"s64", Int64Regs, i64imm>; 1303 defm SET_u64 : SET<"u64", Int64Regs, i64imm>; 1304 defm SET_f32 : SET<"f32", Float32Regs, f32imm>; 1305 defm SET_f64 : SET<"f64", Float64Regs, f64imm>; 1306 1307 //----------------------------------- 1308 // General Selection 1309 //----------------------------------- 1310 1311 // General selp instructions 1312 multiclass SELP<string TypeStr, RegisterClass RC, Operand ImmCls> { 1313 def rr : NVPTXInst<(outs RC:$dst), 1314 (ins RC:$a, RC:$b, Int1Regs:$p), 1315 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>; 1316 def ri : NVPTXInst<(outs RC:$dst), 1317 (ins RC:$a, ImmCls:$b, Int1Regs:$p), 1318 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>; 1319 def ir : NVPTXInst<(outs RC:$dst), 1320 (ins ImmCls:$a, RC:$b, Int1Regs:$p), 1321 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>; 1322 def ii : NVPTXInst<(outs RC:$dst), 1323 (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p), 1324 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>; 1325 } 1326 1327 multiclass SELP_PATTERN<string TypeStr, RegisterClass RC, Operand ImmCls, 1328 SDNode ImmNode> { 1329 def rr : NVPTXInst<(outs RC:$dst), 1330 (ins RC:$a, RC:$b, Int1Regs:$p), 1331 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), 1332 [(set RC:$dst, (select Int1Regs:$p, RC:$a, RC:$b))]>; 1333 def ri : NVPTXInst<(outs RC:$dst), 1334 (ins RC:$a, ImmCls:$b, Int1Regs:$p), 1335 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), 1336 [(set RC:$dst, (select Int1Regs:$p, RC:$a, ImmNode:$b))]>; 1337 def ir : NVPTXInst<(outs RC:$dst), 1338 (ins ImmCls:$a, RC:$b, Int1Regs:$p), 1339 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), 1340 [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, RC:$b))]>; 1341 def ii : NVPTXInst<(outs RC:$dst), 1342 (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p), 1343 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), 1344 [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, ImmNode:$b))]>; 1345 } 1346 1347 defm SELP_b16 : SELP_PATTERN<"b16", Int16Regs, i16imm, imm>; 1348 defm SELP_s16 : SELP<"s16", Int16Regs, i16imm>; 1349 defm SELP_u16 : SELP<"u16", Int16Regs, i16imm>; 1350 defm SELP_b32 : SELP_PATTERN<"b32", Int32Regs, i32imm, imm>; 1351 defm SELP_s32 : SELP<"s32", Int32Regs, i32imm>; 1352 defm SELP_u32 : SELP<"u32", Int32Regs, i32imm>; 1353 defm SELP_b64 : SELP_PATTERN<"b64", Int64Regs, i64imm, imm>; 1354 defm SELP_s64 : SELP<"s64", Int64Regs, i64imm>; 1355 defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>; 1356 defm SELP_f32 : SELP_PATTERN<"f32", Float32Regs, f32imm, fpimm>; 1357 defm SELP_f64 : SELP_PATTERN<"f64", Float64Regs, f64imm, fpimm>; 1358 1359 // 1360 // Funnnel shift in clamp mode 1361 // 1362 // - SDNodes are created so they can be used in the DAG code, 1363 // e.g. NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts) 1364 // 1365 def SDTIntShiftDOp: SDTypeProfile<1, 3, 1366 [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, 1367 SDTCisInt<0>, SDTCisInt<3>]>; 1368 def FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>; 1369 def FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>; 1370 1371 def FUNSHFLCLAMP : NVPTXInst<(outs Int32Regs:$dst), 1372 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 1373 "shf.l.clamp.b32 \t$dst, $lo, $hi, $amt;", 1374 [(set Int32Regs:$dst, 1375 (FUN_SHFL_CLAMP Int32Regs:$lo, 1376 Int32Regs:$hi, Int32Regs:$amt))]>; 1377 1378 def FUNSHFRCLAMP : NVPTXInst<(outs Int32Regs:$dst), 1379 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 1380 "shf.r.clamp.b32 \t$dst, $lo, $hi, $amt;", 1381 [(set Int32Regs:$dst, 1382 (FUN_SHFR_CLAMP Int32Regs:$lo, 1383 Int32Regs:$hi, Int32Regs:$amt))]>; 1384 1385 //----------------------------------- 1386 // Data Movement (Load / Store, Move) 1387 //----------------------------------- 1388 1389 def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], 1390 [SDNPWantRoot]>; 1391 def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex], 1392 [SDNPWantRoot]>; 1393 1394 def MEMri : Operand<i32> { 1395 let PrintMethod = "printMemOperand"; 1396 let MIOperandInfo = (ops Int32Regs, i32imm); 1397 } 1398 def MEMri64 : Operand<i64> { 1399 let PrintMethod = "printMemOperand"; 1400 let MIOperandInfo = (ops Int64Regs, i64imm); 1401 } 1402 1403 def imem : Operand<iPTR> { 1404 let PrintMethod = "printOperand"; 1405 } 1406 1407 def imemAny : Operand<iPTRAny> { 1408 let PrintMethod = "printOperand"; 1409 } 1410 1411 def LdStCode : Operand<i32> { 1412 let PrintMethod = "printLdStCode"; 1413 } 1414 1415 def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; 1416 def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>; 1417 1418 def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a), 1419 "mov.u32 \t$dst, $a;", 1420 [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>; 1421 1422 def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a), 1423 "mov.u64 \t$dst, $a;", 1424 [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>; 1425 1426 // Get pointer to local stack 1427 def MOV_DEPOT_ADDR 1428 : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num), 1429 "mov.u32 \t$d, __local_depot$num;", []>; 1430 def MOV_DEPOT_ADDR_64 1431 : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num), 1432 "mov.u64 \t$d, __local_depot$num;", []>; 1433 1434 1435 // copyPhysreg is hard-coded in NVPTXInstrInfo.cpp 1436 let IsSimpleMove=1 in { 1437 def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss), 1438 "mov.pred \t$dst, $sss;", []>; 1439 def IMOV16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss), 1440 "mov.u16 \t$dst, $sss;", []>; 1441 def IMOV32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss), 1442 "mov.u32 \t$dst, $sss;", []>; 1443 def IMOV64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss), 1444 "mov.u64 \t$dst, $sss;", []>; 1445 1446 def FMOV32rr: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), 1447 "mov.f32 \t$dst, $src;", []>; 1448 def FMOV64rr: NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src), 1449 "mov.f64 \t$dst, $src;", []>; 1450 } 1451 def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src), 1452 "mov.pred \t$dst, $src;", 1453 [(set Int1Regs:$dst, imm:$src)]>; 1454 def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src), 1455 "mov.u16 \t$dst, $src;", 1456 [(set Int16Regs:$dst, imm:$src)]>; 1457 def IMOV32ri: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src), 1458 "mov.u32 \t$dst, $src;", 1459 [(set Int32Regs:$dst, imm:$src)]>; 1460 def IMOV64i: NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src), 1461 "mov.u64 \t$dst, $src;", 1462 [(set Int64Regs:$dst, imm:$src)]>; 1463 1464 def FMOV32ri: NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src), 1465 "mov.f32 \t$dst, $src;", 1466 [(set Float32Regs:$dst, fpimm:$src)]>; 1467 def FMOV64ri: NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src), 1468 "mov.f64 \t$dst, $src;", 1469 [(set Float64Regs:$dst, fpimm:$src)]>; 1470 1471 def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>; 1472 1473 //---- Copy Frame Index ---- 1474 def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr), 1475 "add.u32 \t$dst, ${addr:add};", 1476 [(set Int32Regs:$dst, ADDRri:$addr)]>; 1477 def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr), 1478 "add.u64 \t$dst, ${addr:add};", 1479 [(set Int64Regs:$dst, ADDRri64:$addr)]>; 1480 1481 //----------------------------------- 1482 // Comparison and Selection 1483 //----------------------------------- 1484 1485 multiclass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode, 1486 Instruction setp_16rr, 1487 Instruction setp_16ri, 1488 Instruction setp_16ir, 1489 Instruction setp_32rr, 1490 Instruction setp_32ri, 1491 Instruction setp_32ir, 1492 Instruction setp_64rr, 1493 Instruction setp_64ri, 1494 Instruction setp_64ir, 1495 Instruction set_16rr, 1496 Instruction set_16ri, 1497 Instruction set_16ir, 1498 Instruction set_32rr, 1499 Instruction set_32ri, 1500 Instruction set_32ir, 1501 Instruction set_64rr, 1502 Instruction set_64ri, 1503 Instruction set_64ir> { 1504 // i16 -> pred 1505 def : Pat<(i1 (OpNode Int16Regs:$a, Int16Regs:$b)), 1506 (setp_16rr Int16Regs:$a, Int16Regs:$b, Mode)>; 1507 def : Pat<(i1 (OpNode Int16Regs:$a, imm:$b)), 1508 (setp_16ri Int16Regs:$a, imm:$b, Mode)>; 1509 def : Pat<(i1 (OpNode imm:$a, Int16Regs:$b)), 1510 (setp_16ir imm:$a, Int16Regs:$b, Mode)>; 1511 // i32 -> pred 1512 def : Pat<(i1 (OpNode Int32Regs:$a, Int32Regs:$b)), 1513 (setp_32rr Int32Regs:$a, Int32Regs:$b, Mode)>; 1514 def : Pat<(i1 (OpNode Int32Regs:$a, imm:$b)), 1515 (setp_32ri Int32Regs:$a, imm:$b, Mode)>; 1516 def : Pat<(i1 (OpNode imm:$a, Int32Regs:$b)), 1517 (setp_32ir imm:$a, Int32Regs:$b, Mode)>; 1518 // i64 -> pred 1519 def : Pat<(i1 (OpNode Int64Regs:$a, Int64Regs:$b)), 1520 (setp_64rr Int64Regs:$a, Int64Regs:$b, Mode)>; 1521 def : Pat<(i1 (OpNode Int64Regs:$a, imm:$b)), 1522 (setp_64ri Int64Regs:$a, imm:$b, Mode)>; 1523 def : Pat<(i1 (OpNode imm:$a, Int64Regs:$b)), 1524 (setp_64ir imm:$a, Int64Regs:$b, Mode)>; 1525 1526 // i16 -> i32 1527 def : Pat<(i32 (OpNode Int16Regs:$a, Int16Regs:$b)), 1528 (set_16rr Int16Regs:$a, Int16Regs:$b, Mode)>; 1529 def : Pat<(i32 (OpNode Int16Regs:$a, imm:$b)), 1530 (set_16ri Int16Regs:$a, imm:$b, Mode)>; 1531 def : Pat<(i32 (OpNode imm:$a, Int16Regs:$b)), 1532 (set_16ir imm:$a, Int16Regs:$b, Mode)>; 1533 // i32 -> i32 1534 def : Pat<(i32 (OpNode Int32Regs:$a, Int32Regs:$b)), 1535 (set_32rr Int32Regs:$a, Int32Regs:$b, Mode)>; 1536 def : Pat<(i32 (OpNode Int32Regs:$a, imm:$b)), 1537 (set_32ri Int32Regs:$a, imm:$b, Mode)>; 1538 def : Pat<(i32 (OpNode imm:$a, Int32Regs:$b)), 1539 (set_32ir imm:$a, Int32Regs:$b, Mode)>; 1540 // i64 -> i32 1541 def : Pat<(i32 (OpNode Int64Regs:$a, Int64Regs:$b)), 1542 (set_64rr Int64Regs:$a, Int64Regs:$b, Mode)>; 1543 def : Pat<(i32 (OpNode Int64Regs:$a, imm:$b)), 1544 (set_64ri Int64Regs:$a, imm:$b, Mode)>; 1545 def : Pat<(i32 (OpNode imm:$a, Int64Regs:$b)), 1546 (set_64ir imm:$a, Int64Regs:$b, Mode)>; 1547 } 1548 1549 multiclass ISET_FORMAT_SIGNED<PatFrag OpNode, PatLeaf Mode> 1550 : ISET_FORMAT<OpNode, Mode, 1551 SETP_s16rr, SETP_s16ri, SETP_s16ir, 1552 SETP_s32rr, SETP_s32ri, SETP_s32ir, 1553 SETP_s64rr, SETP_s64ri, SETP_s64ir, 1554 SET_s16rr, SET_s16ri, SET_s16ir, 1555 SET_s32rr, SET_s32ri, SET_s32ir, 1556 SET_s64rr, SET_s64ri, SET_s64ir> { 1557 // TableGen doesn't like empty multiclasses 1558 def : PatLeaf<(i32 0)>; 1559 } 1560 1561 multiclass ISET_FORMAT_UNSIGNED<PatFrag OpNode, PatLeaf Mode> 1562 : ISET_FORMAT<OpNode, Mode, 1563 SETP_u16rr, SETP_u16ri, SETP_u16ir, 1564 SETP_u32rr, SETP_u32ri, SETP_u32ir, 1565 SETP_u64rr, SETP_u64ri, SETP_u64ir, 1566 SET_u16rr, SET_u16ri, SET_u16ir, 1567 SET_u32rr, SET_u32ri, SET_u32ir, 1568 SET_u64rr, SET_u64ri, SET_u64ir> { 1569 // TableGen doesn't like empty multiclasses 1570 def : PatLeaf<(i32 0)>; 1571 } 1572 1573 defm : ISET_FORMAT_SIGNED<setgt, CmpGT>; 1574 defm : ISET_FORMAT_UNSIGNED<setugt, CmpGT>; 1575 defm : ISET_FORMAT_SIGNED<setlt, CmpLT>; 1576 defm : ISET_FORMAT_UNSIGNED<setult, CmpLT>; 1577 defm : ISET_FORMAT_SIGNED<setge, CmpGE>; 1578 defm : ISET_FORMAT_UNSIGNED<setuge, CmpGE>; 1579 defm : ISET_FORMAT_SIGNED<setle, CmpLE>; 1580 defm : ISET_FORMAT_UNSIGNED<setule, CmpLE>; 1581 defm : ISET_FORMAT_SIGNED<seteq, CmpEQ>; 1582 defm : ISET_FORMAT_UNSIGNED<setueq, CmpEQ>; 1583 defm : ISET_FORMAT_SIGNED<setne, CmpNE>; 1584 defm : ISET_FORMAT_UNSIGNED<setune, CmpNE>; 1585 1586 // i1 compares 1587 def : Pat<(setne Int1Regs:$a, Int1Regs:$b), 1588 (XORb1rr Int1Regs:$a, Int1Regs:$b)>; 1589 def : Pat<(setune Int1Regs:$a, Int1Regs:$b), 1590 (XORb1rr Int1Regs:$a, Int1Regs:$b)>; 1591 1592 def : Pat<(seteq Int1Regs:$a, Int1Regs:$b), 1593 (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>; 1594 def : Pat<(setueq Int1Regs:$a, Int1Regs:$b), 1595 (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>; 1596 1597 // i1 compare -> i32 1598 def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)), 1599 (SELP_u32ii -1, 0, (XORb1rr Int1Regs:$a, Int1Regs:$b))>; 1600 def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)), 1601 (SELP_u32ii 0, -1, (XORb1rr Int1Regs:$a, Int1Regs:$b))>; 1602 1603 1604 1605 multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> { 1606 // f32 -> pred 1607 def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)), 1608 (SETP_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>, 1609 Requires<[doF32FTZ]>; 1610 def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)), 1611 (SETP_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>; 1612 def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)), 1613 (SETP_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>, 1614 Requires<[doF32FTZ]>; 1615 def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)), 1616 (SETP_f32ri Float32Regs:$a, fpimm:$b, Mode)>; 1617 def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)), 1618 (SETP_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>, 1619 Requires<[doF32FTZ]>; 1620 def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)), 1621 (SETP_f32ir fpimm:$a, Float32Regs:$b, Mode)>; 1622 1623 // f64 -> pred 1624 def : Pat<(i1 (OpNode Float64Regs:$a, Float64Regs:$b)), 1625 (SETP_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>; 1626 def : Pat<(i1 (OpNode Float64Regs:$a, fpimm:$b)), 1627 (SETP_f64ri Float64Regs:$a, fpimm:$b, Mode)>; 1628 def : Pat<(i1 (OpNode fpimm:$a, Float64Regs:$b)), 1629 (SETP_f64ir fpimm:$a, Float64Regs:$b, Mode)>; 1630 1631 // f32 -> i32 1632 def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)), 1633 (SET_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>, 1634 Requires<[doF32FTZ]>; 1635 def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)), 1636 (SET_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>; 1637 def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)), 1638 (SET_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>, 1639 Requires<[doF32FTZ]>; 1640 def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)), 1641 (SET_f32ri Float32Regs:$a, fpimm:$b, Mode)>; 1642 def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)), 1643 (SET_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>, 1644 Requires<[doF32FTZ]>; 1645 def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)), 1646 (SET_f32ir fpimm:$a, Float32Regs:$b, Mode)>; 1647 1648 // f64 -> i32 1649 def : Pat<(i32 (OpNode Float64Regs:$a, Float64Regs:$b)), 1650 (SET_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>; 1651 def : Pat<(i32 (OpNode Float64Regs:$a, fpimm:$b)), 1652 (SET_f64ri Float64Regs:$a, fpimm:$b, Mode)>; 1653 def : Pat<(i32 (OpNode fpimm:$a, Float64Regs:$b)), 1654 (SET_f64ir fpimm:$a, Float64Regs:$b, Mode)>; 1655 } 1656 1657 defm FSetOGT : FSET_FORMAT<setogt, CmpGT, CmpGT_FTZ>; 1658 defm FSetOLT : FSET_FORMAT<setolt, CmpLT, CmpLT_FTZ>; 1659 defm FSetOGE : FSET_FORMAT<setoge, CmpGE, CmpGE_FTZ>; 1660 defm FSetOLE : FSET_FORMAT<setole, CmpLE, CmpLE_FTZ>; 1661 defm FSetOEQ : FSET_FORMAT<setoeq, CmpEQ, CmpEQ_FTZ>; 1662 defm FSetONE : FSET_FORMAT<setone, CmpNE, CmpNE_FTZ>; 1663 1664 defm FSetUGT : FSET_FORMAT<setugt, CmpGTU, CmpGTU_FTZ>; 1665 defm FSetULT : FSET_FORMAT<setult, CmpLTU, CmpLTU_FTZ>; 1666 defm FSetUGE : FSET_FORMAT<setuge, CmpGEU, CmpGEU_FTZ>; 1667 defm FSetULE : FSET_FORMAT<setule, CmpLEU, CmpLEU_FTZ>; 1668 defm FSetUEQ : FSET_FORMAT<setueq, CmpEQU, CmpEQU_FTZ>; 1669 defm FSetUNE : FSET_FORMAT<setune, CmpNEU, CmpNEU_FTZ>; 1670 1671 defm FSetGT : FSET_FORMAT<setgt, CmpGT, CmpGT_FTZ>; 1672 defm FSetLT : FSET_FORMAT<setlt, CmpLT, CmpLT_FTZ>; 1673 defm FSetGE : FSET_FORMAT<setge, CmpGE, CmpGE_FTZ>; 1674 defm FSetLE : FSET_FORMAT<setle, CmpLE, CmpLE_FTZ>; 1675 defm FSetEQ : FSET_FORMAT<seteq, CmpEQ, CmpEQ_FTZ>; 1676 defm FSetNE : FSET_FORMAT<setne, CmpNE, CmpNE_FTZ>; 1677 1678 defm FSetNUM : FSET_FORMAT<seto, CmpNUM, CmpNUM_FTZ>; 1679 defm FSetNAN : FSET_FORMAT<setuo, CmpNAN, CmpNAN_FTZ>; 1680 1681 //def ld_param : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad, 1682 // [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 1683 1684 def SDTDeclareParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, 1685 SDTCisInt<2>]>; 1686 def SDTDeclareScalarParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, 1687 SDTCisInt<1>, SDTCisInt<2>]>; 1688 def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; 1689 def SDTLoadParamV2Profile : SDTypeProfile<2, 2, [SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisInt<3>]>; 1690 def SDTLoadParamV4Profile : SDTypeProfile<4, 2, [SDTCisInt<4>, SDTCisInt<5>]>; 1691 def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>; 1692 def SDTPrintCallUniProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>; 1693 def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>; 1694 def SDTStoreParamV2Profile : SDTypeProfile<0, 4, [SDTCisInt<0>, SDTCisInt<1>]>; 1695 def SDTStoreParamV4Profile : SDTypeProfile<0, 6, [SDTCisInt<0>, SDTCisInt<1>]>; 1696 def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>; 1697 def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>; 1698 def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>; 1699 def SDTCallVoidProfile : SDTypeProfile<0, 1, []>; 1700 def SDTCallValProfile : SDTypeProfile<1, 0, []>; 1701 def SDTMoveParamProfile : SDTypeProfile<1, 1, []>; 1702 def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>; 1703 def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>; 1704 def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>; 1705 def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>; 1706 1707 def DeclareParam : SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile, 1708 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1709 def DeclareScalarParam : SDNode<"NVPTXISD::DeclareScalarParam", 1710 SDTDeclareScalarParamProfile, 1711 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1712 def DeclareRetParam : SDNode<"NVPTXISD::DeclareRetParam", 1713 SDTDeclareParamProfile, 1714 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1715 def DeclareRet : SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile, 1716 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1717 def LoadParam : SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile, 1718 [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; 1719 def LoadParamV2 : SDNode<"NVPTXISD::LoadParamV2", SDTLoadParamV2Profile, 1720 [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; 1721 def LoadParamV4 : SDNode<"NVPTXISD::LoadParamV4", SDTLoadParamV4Profile, 1722 [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; 1723 def PrintCall : SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile, 1724 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1725 def PrintCallUni : SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile, 1726 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1727 def StoreParam : SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile, 1728 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1729 def StoreParamV2 : SDNode<"NVPTXISD::StoreParamV2", SDTStoreParamV2Profile, 1730 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1731 def StoreParamV4 : SDNode<"NVPTXISD::StoreParamV4", SDTStoreParamV4Profile, 1732 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1733 def StoreParamU32 : SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile, 1734 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1735 def StoreParamS32 : SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile, 1736 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1737 def CallArgBegin : SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile, 1738 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1739 def CallArg : SDNode<"NVPTXISD::CallArg", SDTCallArgProfile, 1740 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1741 def LastCallArg : SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile, 1742 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1743 def CallArgEnd : SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile, 1744 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1745 def CallVoid : SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile, 1746 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1747 def Prototype : SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile, 1748 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1749 def CallVal : SDNode<"NVPTXISD::CallVal", SDTCallValProfile, 1750 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1751 def MoveParam : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, 1752 []>; 1753 def StoreRetval : SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile, 1754 [SDNPHasChain, SDNPSideEffect]>; 1755 def StoreRetvalV2 : SDNode<"NVPTXISD::StoreRetvalV2", SDTStoreRetvalV2Profile, 1756 [SDNPHasChain, SDNPSideEffect]>; 1757 def StoreRetvalV4 : SDNode<"NVPTXISD::StoreRetvalV4", SDTStoreRetvalV4Profile, 1758 [SDNPHasChain, SDNPSideEffect]>; 1759 def PseudoUseParam : SDNode<"NVPTXISD::PseudoUseParam", 1760 SDTPseudoUseParamProfile, 1761 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1762 def RETURNNode : SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile, 1763 [SDNPHasChain, SDNPSideEffect]>; 1764 1765 class LoadParamMemInst<NVPTXRegClass regclass, string opstr> : 1766 NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), 1767 !strconcat(!strconcat("ld.param", opstr), 1768 "\t$dst, [retval0+$b];"), 1769 []>; 1770 1771 class LoadParamRegInst<NVPTXRegClass regclass, string opstr> : 1772 NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), 1773 !strconcat(!strconcat("mov", opstr), 1774 "\t$dst, retval$b;"), 1775 [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>; 1776 1777 class LoadParamV2MemInst<NVPTXRegClass regclass, string opstr> : 1778 NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b), 1779 !strconcat(!strconcat("ld.param.v2", opstr), 1780 "\t{{$dst, $dst2}}, [retval0+$b];"), []>; 1781 1782 class LoadParamV4MemInst<NVPTXRegClass regclass, string opstr> : 1783 NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3, 1784 regclass:$dst4), 1785 (ins i32imm:$b), 1786 !strconcat(!strconcat("ld.param.v4", opstr), 1787 "\t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"), []>; 1788 1789 class StoreParamInst<NVPTXRegClass regclass, string opstr> : 1790 NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), 1791 !strconcat(!strconcat("st.param", opstr), 1792 "\t[param$a+$b], $val;"), 1793 []>; 1794 1795 class StoreParamV2Inst<NVPTXRegClass regclass, string opstr> : 1796 NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, 1797 i32imm:$a, i32imm:$b), 1798 !strconcat(!strconcat("st.param.v2", opstr), 1799 "\t[param$a+$b], {{$val, $val2}};"), 1800 []>; 1801 1802 class StoreParamV4Inst<NVPTXRegClass regclass, string opstr> : 1803 NVPTXInst<(outs), (ins regclass:$val, regclass:$val1, regclass:$val2, 1804 regclass:$val3, i32imm:$a, i32imm:$b), 1805 !strconcat(!strconcat("st.param.v4", opstr), 1806 "\t[param$a+$b], {{$val, $val2, $val3, $val4}};"), 1807 []>; 1808 1809 class StoreRetvalInst<NVPTXRegClass regclass, string opstr> : 1810 NVPTXInst<(outs), (ins regclass:$val, i32imm:$a), 1811 !strconcat(!strconcat("st.param", opstr), 1812 "\t[func_retval0+$a], $val;"), 1813 []>; 1814 1815 class StoreRetvalV2Inst<NVPTXRegClass regclass, string opstr> : 1816 NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a), 1817 !strconcat(!strconcat("st.param.v2", opstr), 1818 "\t[func_retval0+$a], {{$val, $val2}};"), 1819 []>; 1820 1821 class StoreRetvalV4Inst<NVPTXRegClass regclass, string opstr> : 1822 NVPTXInst<(outs), 1823 (ins regclass:$val, regclass:$val2, regclass:$val3, 1824 regclass:$val4, i32imm:$a), 1825 !strconcat(!strconcat("st.param.v4", opstr), 1826 "\t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"), 1827 []>; 1828 1829 def PrintCallRetInst1 : NVPTXInst<(outs), (ins), 1830 "call (retval0), ", 1831 [(PrintCall (i32 1))]>; 1832 def PrintCallRetInst2 : NVPTXInst<(outs), (ins), 1833 "call (retval0, retval1), ", 1834 [(PrintCall (i32 2))]>; 1835 def PrintCallRetInst3 : NVPTXInst<(outs), (ins), 1836 "call (retval0, retval1, retval2), ", 1837 [(PrintCall (i32 3))]>; 1838 def PrintCallRetInst4 : NVPTXInst<(outs), (ins), 1839 "call (retval0, retval1, retval2, retval3), ", 1840 [(PrintCall (i32 4))]>; 1841 def PrintCallRetInst5 : NVPTXInst<(outs), (ins), 1842 "call (retval0, retval1, retval2, retval3, retval4), ", 1843 [(PrintCall (i32 5))]>; 1844 def PrintCallRetInst6 : NVPTXInst<(outs), (ins), 1845 "call (retval0, retval1, retval2, retval3, retval4, retval5), ", 1846 [(PrintCall (i32 6))]>; 1847 def PrintCallRetInst7 : NVPTXInst<(outs), (ins), 1848 "call (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ", 1849 [(PrintCall (i32 7))]>; 1850 def PrintCallRetInst8 : NVPTXInst<(outs), (ins), 1851 !strconcat("call (retval0, retval1, retval2, retval3, retval4", 1852 ", retval5, retval6, retval7), "), 1853 [(PrintCall (i32 8))]>; 1854 1855 def PrintCallNoRetInst : NVPTXInst<(outs), (ins), "call ", 1856 [(PrintCall (i32 0))]>; 1857 1858 def PrintCallUniRetInst1 : NVPTXInst<(outs), (ins), 1859 "call.uni (retval0), ", 1860 [(PrintCallUni (i32 1))]>; 1861 def PrintCallUniRetInst2 : NVPTXInst<(outs), (ins), 1862 "call.uni (retval0, retval1), ", 1863 [(PrintCallUni (i32 2))]>; 1864 def PrintCallUniRetInst3 : NVPTXInst<(outs), (ins), 1865 "call.uni (retval0, retval1, retval2), ", 1866 [(PrintCallUni (i32 3))]>; 1867 def PrintCallUniRetInst4 : NVPTXInst<(outs), (ins), 1868 "call.uni (retval0, retval1, retval2, retval3), ", 1869 [(PrintCallUni (i32 4))]>; 1870 def PrintCallUniRetInst5 : NVPTXInst<(outs), (ins), 1871 "call.uni (retval0, retval1, retval2, retval3, retval4), ", 1872 [(PrintCallUni (i32 5))]>; 1873 def PrintCallUniRetInst6 : NVPTXInst<(outs), (ins), 1874 "call.uni (retval0, retval1, retval2, retval3, retval4, retval5), ", 1875 [(PrintCallUni (i32 6))]>; 1876 def PrintCallUniRetInst7 : NVPTXInst<(outs), (ins), 1877 "call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ", 1878 [(PrintCallUni (i32 7))]>; 1879 def PrintCallUniRetInst8 : NVPTXInst<(outs), (ins), 1880 !strconcat("call.uni (retval0, retval1, retval2, retval3, retval4", 1881 ", retval5, retval6, retval7), "), 1882 [(PrintCallUni (i32 8))]>; 1883 1884 def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), "call.uni ", 1885 [(PrintCallUni (i32 0))]>; 1886 1887 def LoadParamMemI64 : LoadParamMemInst<Int64Regs, ".b64">; 1888 def LoadParamMemI32 : LoadParamMemInst<Int32Regs, ".b32">; 1889 def LoadParamMemI16 : LoadParamMemInst<Int16Regs, ".b16">; 1890 def LoadParamMemI8 : LoadParamMemInst<Int16Regs, ".b8">; 1891 def LoadParamMemV2I64 : LoadParamV2MemInst<Int64Regs, ".b64">; 1892 def LoadParamMemV2I32 : LoadParamV2MemInst<Int32Regs, ".b32">; 1893 def LoadParamMemV2I16 : LoadParamV2MemInst<Int16Regs, ".b16">; 1894 def LoadParamMemV2I8 : LoadParamV2MemInst<Int16Regs, ".b8">; 1895 def LoadParamMemV4I32 : LoadParamV4MemInst<Int32Regs, ".b32">; 1896 def LoadParamMemV4I16 : LoadParamV4MemInst<Int16Regs, ".b16">; 1897 def LoadParamMemV4I8 : LoadParamV4MemInst<Int16Regs, ".b8">; 1898 def LoadParamMemF32 : LoadParamMemInst<Float32Regs, ".f32">; 1899 def LoadParamMemF64 : LoadParamMemInst<Float64Regs, ".f64">; 1900 def LoadParamMemV2F32 : LoadParamV2MemInst<Float32Regs, ".f32">; 1901 def LoadParamMemV2F64 : LoadParamV2MemInst<Float64Regs, ".f64">; 1902 def LoadParamMemV4F32 : LoadParamV4MemInst<Float32Regs, ".f32">; 1903 1904 def StoreParamI64 : StoreParamInst<Int64Regs, ".b64">; 1905 def StoreParamI32 : StoreParamInst<Int32Regs, ".b32">; 1906 1907 def StoreParamI16 : StoreParamInst<Int16Regs, ".b16">; 1908 def StoreParamI8 : StoreParamInst<Int16Regs, ".b8">; 1909 def StoreParamV2I64 : StoreParamV2Inst<Int64Regs, ".b64">; 1910 def StoreParamV2I32 : StoreParamV2Inst<Int32Regs, ".b32">; 1911 def StoreParamV2I16 : StoreParamV2Inst<Int16Regs, ".b16">; 1912 def StoreParamV2I8 : StoreParamV2Inst<Int16Regs, ".b8">; 1913 1914 // FIXME: StoreParamV4Inst crashes llvm-tblgen :( 1915 //def StoreParamV4I32 : StoreParamV4Inst<Int32Regs, ".b32">; 1916 def StoreParamV4I32 : NVPTXInst<(outs), (ins Int32Regs:$val, Int32Regs:$val2, 1917 Int32Regs:$val3, Int32Regs:$val4, 1918 i32imm:$a, i32imm:$b), 1919 "st.param.v4.b32\t[param$a+$b], {{$val, $val2, $val3, $val4}};", 1920 []>; 1921 1922 def StoreParamV4I16 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2, 1923 Int16Regs:$val3, Int16Regs:$val4, 1924 i32imm:$a, i32imm:$b), 1925 "st.param.v4.b16\t[param$a+$b], {{$val, $val2, $val3, $val4}};", 1926 []>; 1927 1928 def StoreParamV4I8 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2, 1929 Int16Regs:$val3, Int16Regs:$val4, 1930 i32imm:$a, i32imm:$b), 1931 "st.param.v4.b8\t[param$a+$b], {{$val, $val2, $val3, $val4}};", 1932 []>; 1933 1934 def StoreParamF32 : StoreParamInst<Float32Regs, ".f32">; 1935 def StoreParamF64 : StoreParamInst<Float64Regs, ".f64">; 1936 def StoreParamV2F32 : StoreParamV2Inst<Float32Regs, ".f32">; 1937 def StoreParamV2F64 : StoreParamV2Inst<Float64Regs, ".f64">; 1938 // FIXME: StoreParamV4Inst crashes llvm-tblgen :( 1939 //def StoreParamV4F32 : StoreParamV4Inst<Float32Regs, ".f32">; 1940 def StoreParamV4F32 : NVPTXInst<(outs), 1941 (ins Float32Regs:$val, Float32Regs:$val2, 1942 Float32Regs:$val3, Float32Regs:$val4, 1943 i32imm:$a, i32imm:$b), 1944 "st.param.v4.f32\t[param$a+$b], {{$val, $val2, $val3, $val4}};", 1945 []>; 1946 1947 1948 def StoreRetvalI64 : StoreRetvalInst<Int64Regs, ".b64">; 1949 def StoreRetvalI32 : StoreRetvalInst<Int32Regs, ".b32">; 1950 def StoreRetvalI16 : StoreRetvalInst<Int16Regs, ".b16">; 1951 def StoreRetvalI8 : StoreRetvalInst<Int16Regs, ".b8">; 1952 def StoreRetvalV2I64 : StoreRetvalV2Inst<Int64Regs, ".b64">; 1953 def StoreRetvalV2I32 : StoreRetvalV2Inst<Int32Regs, ".b32">; 1954 def StoreRetvalV2I16 : StoreRetvalV2Inst<Int16Regs, ".b16">; 1955 def StoreRetvalV2I8 : StoreRetvalV2Inst<Int16Regs, ".b8">; 1956 def StoreRetvalV4I32 : StoreRetvalV4Inst<Int32Regs, ".b32">; 1957 def StoreRetvalV4I16 : StoreRetvalV4Inst<Int16Regs, ".b16">; 1958 def StoreRetvalV4I8 : StoreRetvalV4Inst<Int16Regs, ".b8">; 1959 1960 def StoreRetvalF64 : StoreRetvalInst<Float64Regs, ".f64">; 1961 def StoreRetvalF32 : StoreRetvalInst<Float32Regs, ".f32">; 1962 def StoreRetvalV2F64 : StoreRetvalV2Inst<Float64Regs, ".f64">; 1963 def StoreRetvalV2F32 : StoreRetvalV2Inst<Float32Regs, ".f32">; 1964 def StoreRetvalV4F32 : StoreRetvalV4Inst<Float32Regs, ".f32">; 1965 1966 def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>; 1967 def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>; 1968 def CallArgEndInst0 : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>; 1969 def RETURNInst : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>; 1970 1971 class CallArgInst<NVPTXRegClass regclass> : 1972 NVPTXInst<(outs), (ins regclass:$a), "$a, ", 1973 [(CallArg (i32 0), regclass:$a)]>; 1974 1975 class LastCallArgInst<NVPTXRegClass regclass> : 1976 NVPTXInst<(outs), (ins regclass:$a), "$a", 1977 [(LastCallArg (i32 0), regclass:$a)]>; 1978 1979 def CallArgI64 : CallArgInst<Int64Regs>; 1980 def CallArgI32 : CallArgInst<Int32Regs>; 1981 def CallArgI16 : CallArgInst<Int16Regs>; 1982 1983 def CallArgF64 : CallArgInst<Float64Regs>; 1984 def CallArgF32 : CallArgInst<Float32Regs>; 1985 1986 def LastCallArgI64 : LastCallArgInst<Int64Regs>; 1987 def LastCallArgI32 : LastCallArgInst<Int32Regs>; 1988 def LastCallArgI16 : LastCallArgInst<Int16Regs>; 1989 1990 def LastCallArgF64 : LastCallArgInst<Float64Regs>; 1991 def LastCallArgF32 : LastCallArgInst<Float32Regs>; 1992 1993 def CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ", 1994 [(CallArg (i32 0), (i32 imm:$a))]>; 1995 def LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a", 1996 [(LastCallArg (i32 0), (i32 imm:$a))]>; 1997 1998 def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ", 1999 [(CallArg (i32 1), (i32 imm:$a))]>; 2000 def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a", 2001 [(LastCallArg (i32 1), (i32 imm:$a))]>; 2002 2003 def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr), 2004 "$addr, ", 2005 [(CallVoid (Wrapper tglobaladdr:$addr))]>; 2006 def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr), 2007 "$addr, ", 2008 [(CallVoid Int32Regs:$addr)]>; 2009 def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr), 2010 "$addr, ", 2011 [(CallVoid Int64Regs:$addr)]>; 2012 def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val), 2013 ", prototype_$val;", 2014 [(Prototype (i32 imm:$val))]>; 2015 2016 def DeclareRetMemInst : NVPTXInst<(outs), 2017 (ins i32imm:$align, i32imm:$size, i32imm:$num), 2018 ".param .align $align .b8 retval$num[$size];", 2019 [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>; 2020 def DeclareRetScalarInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num), 2021 ".param .b$size retval$num;", 2022 [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>; 2023 def DeclareRetRegInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num), 2024 ".reg .b$size retval$num;", 2025 [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>; 2026 2027 def DeclareParamInst : NVPTXInst<(outs), 2028 (ins i32imm:$align, i32imm:$a, i32imm:$size), 2029 ".param .align $align .b8 param$a[$size];", 2030 [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>; 2031 def DeclareScalarParamInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size), 2032 ".param .b$size param$a;", 2033 [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>; 2034 def DeclareScalarRegInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size), 2035 ".reg .b$size param$a;", 2036 [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>; 2037 2038 class MoveParamInst<NVPTXRegClass regclass, string asmstr> : 2039 NVPTXInst<(outs regclass:$dst), (ins regclass:$src), 2040 !strconcat(!strconcat("mov", asmstr), "\t$dst, $src;"), 2041 [(set regclass:$dst, (MoveParam regclass:$src))]>; 2042 2043 def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">; 2044 def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">; 2045 def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), 2046 "cvt.u16.u32\t$dst, $src;", 2047 [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>; 2048 def MoveParamF64 : MoveParamInst<Float64Regs, ".f64">; 2049 def MoveParamF32 : MoveParamInst<Float32Regs, ".f32">; 2050 2051 class PseudoUseParamInst<NVPTXRegClass regclass> : 2052 NVPTXInst<(outs), (ins regclass:$src), 2053 "// Pseudo use of $src", 2054 [(PseudoUseParam regclass:$src)]>; 2055 2056 def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs>; 2057 def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs>; 2058 def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>; 2059 def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>; 2060 def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>; 2061 2062 2063 // 2064 // Load / Store Handling 2065 // 2066 multiclass LD<NVPTXRegClass regclass> { 2067 def _avar : NVPTXInst<(outs regclass:$dst), 2068 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2069 i32imm:$fromWidth, imem:$addr), 2070 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2071 "$fromWidth \t$dst, [$addr];"), []>; 2072 def _areg : NVPTXInst<(outs regclass:$dst), 2073 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2074 i32imm:$fromWidth, Int32Regs:$addr), 2075 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2076 "$fromWidth \t$dst, [$addr];"), []>; 2077 def _areg_64 : NVPTXInst<(outs regclass:$dst), 2078 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2079 i32imm:$fromWidth, Int64Regs:$addr), 2080 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth", 2081 " \t$dst, [$addr];"), []>; 2082 def _ari : NVPTXInst<(outs regclass:$dst), 2083 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2084 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), 2085 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2086 "$fromWidth \t$dst, [$addr+$offset];"), []>; 2087 def _ari_64 : NVPTXInst<(outs regclass:$dst), 2088 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2089 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), 2090 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth", 2091 " \t$dst, [$addr+$offset];"), []>; 2092 def _asi : NVPTXInst<(outs regclass:$dst), 2093 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2094 i32imm:$fromWidth, imem:$addr, i32imm:$offset), 2095 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2096 "$fromWidth \t$dst, [$addr+$offset];"), []>; 2097 } 2098 2099 let mayLoad=1, hasSideEffects=0 in { 2100 defm LD_i8 : LD<Int16Regs>; 2101 defm LD_i16 : LD<Int16Regs>; 2102 defm LD_i32 : LD<Int32Regs>; 2103 defm LD_i64 : LD<Int64Regs>; 2104 defm LD_f32 : LD<Float32Regs>; 2105 defm LD_f64 : LD<Float64Regs>; 2106 } 2107 2108 multiclass ST<NVPTXRegClass regclass> { 2109 def _avar : NVPTXInst<(outs), 2110 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 2111 LdStCode:$Sign, i32imm:$toWidth, imem:$addr), 2112 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", 2113 " \t[$addr], $src;"), []>; 2114 def _areg : NVPTXInst<(outs), 2115 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 2116 LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr), 2117 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", 2118 " \t[$addr], $src;"), []>; 2119 def _areg_64 : NVPTXInst<(outs), 2120 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 2121 LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr), 2122 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ", 2123 "\t[$addr], $src;"), []>; 2124 def _ari : NVPTXInst<(outs), 2125 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 2126 LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset), 2127 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", 2128 " \t[$addr+$offset], $src;"), []>; 2129 def _ari_64 : NVPTXInst<(outs), 2130 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 2131 LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset), 2132 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ", 2133 "\t[$addr+$offset], $src;"), []>; 2134 def _asi : NVPTXInst<(outs), 2135 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 2136 LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset), 2137 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", 2138 " \t[$addr+$offset], $src;"), []>; 2139 } 2140 2141 let mayStore=1, hasSideEffects=0 in { 2142 defm ST_i8 : ST<Int16Regs>; 2143 defm ST_i16 : ST<Int16Regs>; 2144 defm ST_i32 : ST<Int32Regs>; 2145 defm ST_i64 : ST<Int64Regs>; 2146 defm ST_f32 : ST<Float32Regs>; 2147 defm ST_f64 : ST<Float64Regs>; 2148 } 2149 2150 // The following is used only in and after vector elementizations. 2151 // Vector elementization happens at the machine instruction level, so the 2152 // following instruction 2153 // never appears in the DAG. 2154 multiclass LD_VEC<NVPTXRegClass regclass> { 2155 def _v2_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2156 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2157 i32imm:$fromWidth, imem:$addr), 2158 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2159 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>; 2160 def _v2_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2161 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2162 i32imm:$fromWidth, Int32Regs:$addr), 2163 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2164 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>; 2165 def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2166 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2167 i32imm:$fromWidth, Int64Regs:$addr), 2168 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2169 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>; 2170 def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2171 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2172 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), 2173 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2174 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>; 2175 def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2176 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2177 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), 2178 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2179 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>; 2180 def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2181 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2182 i32imm:$fromWidth, imem:$addr, i32imm:$offset), 2183 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2184 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>; 2185 def _v4_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, 2186 regclass:$dst3, regclass:$dst4), 2187 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2188 i32imm:$fromWidth, imem:$addr), 2189 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2190 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>; 2191 def _v4_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2192 regclass:$dst4), 2193 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2194 i32imm:$fromWidth, Int32Regs:$addr), 2195 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2196 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>; 2197 def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, 2198 regclass:$dst3, regclass:$dst4), 2199 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2200 i32imm:$fromWidth, Int64Regs:$addr), 2201 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2202 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>; 2203 def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2204 regclass:$dst4), 2205 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2206 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), 2207 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2208 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"), 2209 []>; 2210 def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, 2211 regclass:$dst3, regclass:$dst4), 2212 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2213 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), 2214 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2215 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"), 2216 []>; 2217 def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2218 regclass:$dst4), 2219 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2220 i32imm:$fromWidth, imem:$addr, i32imm:$offset), 2221 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2222 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"), 2223 []>; 2224 } 2225 let mayLoad=1, hasSideEffects=0 in { 2226 defm LDV_i8 : LD_VEC<Int16Regs>; 2227 defm LDV_i16 : LD_VEC<Int16Regs>; 2228 defm LDV_i32 : LD_VEC<Int32Regs>; 2229 defm LDV_i64 : LD_VEC<Int64Regs>; 2230 defm LDV_f32 : LD_VEC<Float32Regs>; 2231 defm LDV_f64 : LD_VEC<Float64Regs>; 2232 } 2233 2234 multiclass ST_VEC<NVPTXRegClass regclass> { 2235 def _v2_avar : NVPTXInst<(outs), 2236 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 2237 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr), 2238 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2239 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>; 2240 def _v2_areg : NVPTXInst<(outs), 2241 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 2242 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr), 2243 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2244 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>; 2245 def _v2_areg_64 : NVPTXInst<(outs), 2246 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 2247 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr), 2248 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2249 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>; 2250 def _v2_ari : NVPTXInst<(outs), 2251 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 2252 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, 2253 i32imm:$offset), 2254 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2255 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>; 2256 def _v2_ari_64 : NVPTXInst<(outs), 2257 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 2258 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, 2259 i32imm:$offset), 2260 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2261 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>; 2262 def _v2_asi : NVPTXInst<(outs), 2263 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 2264 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, 2265 i32imm:$offset), 2266 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2267 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>; 2268 def _v4_avar : NVPTXInst<(outs), 2269 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 2270 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2271 i32imm:$fromWidth, imem:$addr), 2272 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2273 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>; 2274 def _v4_areg : NVPTXInst<(outs), 2275 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 2276 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2277 i32imm:$fromWidth, Int32Regs:$addr), 2278 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2279 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>; 2280 def _v4_areg_64 : NVPTXInst<(outs), 2281 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 2282 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2283 i32imm:$fromWidth, Int64Regs:$addr), 2284 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2285 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>; 2286 def _v4_ari : NVPTXInst<(outs), 2287 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 2288 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2289 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), 2290 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2291 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"), 2292 []>; 2293 def _v4_ari_64 : NVPTXInst<(outs), 2294 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 2295 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2296 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), 2297 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2298 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"), 2299 []>; 2300 def _v4_asi : NVPTXInst<(outs), 2301 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 2302 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2303 i32imm:$fromWidth, imem:$addr, i32imm:$offset), 2304 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2305 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"), 2306 []>; 2307 } 2308 let mayStore=1, hasSideEffects=0 in { 2309 defm STV_i8 : ST_VEC<Int16Regs>; 2310 defm STV_i16 : ST_VEC<Int16Regs>; 2311 defm STV_i32 : ST_VEC<Int32Regs>; 2312 defm STV_i64 : ST_VEC<Int64Regs>; 2313 defm STV_f32 : ST_VEC<Float32Regs>; 2314 defm STV_f64 : ST_VEC<Float64Regs>; 2315 } 2316 2317 2318 //---- Conversion ---- 2319 2320 class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn, 2321 NVPTXRegClass regclassOut> : 2322 NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a), 2323 !strconcat("mov.b", !strconcat(SzStr, " \t $d, $a;")), 2324 [(set regclassOut:$d, (bitconvert regclassIn:$a))]>; 2325 2326 def BITCONVERT_32_I2F : F_BITCONVERT<"32", Int32Regs, Float32Regs>; 2327 def BITCONVERT_32_F2I : F_BITCONVERT<"32", Float32Regs, Int32Regs>; 2328 def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>; 2329 def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>; 2330 2331 // NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where 2332 // we cannot specify floating-point literals in isel patterns. Therefore, we 2333 // use an integer selp to select either 1 or 0 and then cvt to floating-point. 2334 2335 // sint -> f32 2336 def : Pat<(f32 (sint_to_fp Int1Regs:$a)), 2337 (CVT_f32_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; 2338 def : Pat<(f32 (sint_to_fp Int16Regs:$a)), 2339 (CVT_f32_s16 Int16Regs:$a, CvtRN)>; 2340 def : Pat<(f32 (sint_to_fp Int32Regs:$a)), 2341 (CVT_f32_s32 Int32Regs:$a, CvtRN)>; 2342 def : Pat<(f32 (sint_to_fp Int64Regs:$a)), 2343 (CVT_f32_s64 Int64Regs:$a, CvtRN)>; 2344 2345 // uint -> f32 2346 def : Pat<(f32 (uint_to_fp Int1Regs:$a)), 2347 (CVT_f32_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; 2348 def : Pat<(f32 (uint_to_fp Int16Regs:$a)), 2349 (CVT_f32_u16 Int16Regs:$a, CvtRN)>; 2350 def : Pat<(f32 (uint_to_fp Int32Regs:$a)), 2351 (CVT_f32_u32 Int32Regs:$a, CvtRN)>; 2352 def : Pat<(f32 (uint_to_fp Int64Regs:$a)), 2353 (CVT_f32_u64 Int64Regs:$a, CvtRN)>; 2354 2355 // sint -> f64 2356 def : Pat<(f64 (sint_to_fp Int1Regs:$a)), 2357 (CVT_f64_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; 2358 def : Pat<(f64 (sint_to_fp Int16Regs:$a)), 2359 (CVT_f64_s16 Int16Regs:$a, CvtRN)>; 2360 def : Pat<(f64 (sint_to_fp Int32Regs:$a)), 2361 (CVT_f64_s32 Int32Regs:$a, CvtRN)>; 2362 def : Pat<(f64 (sint_to_fp Int64Regs:$a)), 2363 (CVT_f64_s64 Int64Regs:$a, CvtRN)>; 2364 2365 // uint -> f64 2366 def : Pat<(f64 (uint_to_fp Int1Regs:$a)), 2367 (CVT_f64_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; 2368 def : Pat<(f64 (uint_to_fp Int16Regs:$a)), 2369 (CVT_f64_u16 Int16Regs:$a, CvtRN)>; 2370 def : Pat<(f64 (uint_to_fp Int32Regs:$a)), 2371 (CVT_f64_u32 Int32Regs:$a, CvtRN)>; 2372 def : Pat<(f64 (uint_to_fp Int64Regs:$a)), 2373 (CVT_f64_u64 Int64Regs:$a, CvtRN)>; 2374 2375 2376 // f32 -> sint 2377 def : Pat<(i1 (fp_to_sint Float32Regs:$a)), 2378 (SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>; 2379 def : Pat<(i16 (fp_to_sint Float32Regs:$a)), 2380 (CVT_s16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; 2381 def : Pat<(i16 (fp_to_sint Float32Regs:$a)), 2382 (CVT_s16_f32 Float32Regs:$a, CvtRZI)>; 2383 def : Pat<(i32 (fp_to_sint Float32Regs:$a)), 2384 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; 2385 def : Pat<(i32 (fp_to_sint Float32Regs:$a)), 2386 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>; 2387 def : Pat<(i64 (fp_to_sint Float32Regs:$a)), 2388 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; 2389 def : Pat<(i64 (fp_to_sint Float32Regs:$a)), 2390 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>; 2391 2392 // f32 -> uint 2393 def : Pat<(i1 (fp_to_uint Float32Regs:$a)), 2394 (SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>; 2395 def : Pat<(i16 (fp_to_uint Float32Regs:$a)), 2396 (CVT_u16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; 2397 def : Pat<(i16 (fp_to_uint Float32Regs:$a)), 2398 (CVT_u16_f32 Float32Regs:$a, CvtRZI)>; 2399 def : Pat<(i32 (fp_to_uint Float32Regs:$a)), 2400 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; 2401 def : Pat<(i32 (fp_to_uint Float32Regs:$a)), 2402 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>; 2403 def : Pat<(i64 (fp_to_uint Float32Regs:$a)), 2404 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; 2405 def : Pat<(i64 (fp_to_uint Float32Regs:$a)), 2406 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>; 2407 2408 // f64 -> sint 2409 def : Pat<(i1 (fp_to_sint Float64Regs:$a)), 2410 (SETP_b64ri (BITCONVERT_64_F2I Float64Regs:$a), 0, CmpEQ)>; 2411 def : Pat<(i16 (fp_to_sint Float64Regs:$a)), 2412 (CVT_s16_f64 Float64Regs:$a, CvtRZI)>; 2413 def : Pat<(i32 (fp_to_sint Float64Regs:$a)), 2414 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>; 2415 def : Pat<(i64 (fp_to_sint Float64Regs:$a)), 2416 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>; 2417 2418 // f64 -> uint 2419 def : Pat<(i1 (fp_to_uint Float64Regs:$a)), 2420 (SETP_b64ri (BITCONVERT_64_F2I Float64Regs:$a), 0, CmpEQ)>; 2421 def : Pat<(i16 (fp_to_uint Float64Regs:$a)), 2422 (CVT_u16_f64 Float64Regs:$a, CvtRZI)>; 2423 def : Pat<(i32 (fp_to_uint Float64Regs:$a)), 2424 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>; 2425 def : Pat<(i64 (fp_to_uint Float64Regs:$a)), 2426 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>; 2427 2428 // sext i1 2429 def : Pat<(i16 (sext Int1Regs:$a)), 2430 (SELP_s16ii -1, 0, Int1Regs:$a)>; 2431 def : Pat<(i32 (sext Int1Regs:$a)), 2432 (SELP_s32ii -1, 0, Int1Regs:$a)>; 2433 def : Pat<(i64 (sext Int1Regs:$a)), 2434 (SELP_s64ii -1, 0, Int1Regs:$a)>; 2435 2436 // zext i1 2437 def : Pat<(i16 (zext Int1Regs:$a)), 2438 (SELP_u16ii 1, 0, Int1Regs:$a)>; 2439 def : Pat<(i32 (zext Int1Regs:$a)), 2440 (SELP_u32ii 1, 0, Int1Regs:$a)>; 2441 def : Pat<(i64 (zext Int1Regs:$a)), 2442 (SELP_u64ii 1, 0, Int1Regs:$a)>; 2443 2444 // anyext i1 2445 def : Pat<(i16 (anyext Int1Regs:$a)), 2446 (SELP_u16ii -1, 0, Int1Regs:$a)>; 2447 def : Pat<(i32 (anyext Int1Regs:$a)), 2448 (SELP_u32ii -1, 0, Int1Regs:$a)>; 2449 def : Pat<(i64 (anyext Int1Regs:$a)), 2450 (SELP_u64ii -1, 0, Int1Regs:$a)>; 2451 2452 // sext i16 2453 def : Pat<(i32 (sext Int16Regs:$a)), 2454 (CVT_s32_s16 Int16Regs:$a, CvtNONE)>; 2455 def : Pat<(i64 (sext Int16Regs:$a)), 2456 (CVT_s64_s16 Int16Regs:$a, CvtNONE)>; 2457 2458 // zext i16 2459 def : Pat<(i32 (zext Int16Regs:$a)), 2460 (CVT_u32_u16 Int16Regs:$a, CvtNONE)>; 2461 def : Pat<(i64 (zext Int16Regs:$a)), 2462 (CVT_u64_u16 Int16Regs:$a, CvtNONE)>; 2463 2464 // anyext i16 2465 def : Pat<(i32 (anyext Int16Regs:$a)), 2466 (CVT_u32_u16 Int16Regs:$a, CvtNONE)>; 2467 def : Pat<(i64 (anyext Int16Regs:$a)), 2468 (CVT_u64_u16 Int16Regs:$a, CvtNONE)>; 2469 2470 // sext i32 2471 def : Pat<(i64 (sext Int32Regs:$a)), 2472 (CVT_s64_s32 Int32Regs:$a, CvtNONE)>; 2473 2474 // zext i32 2475 def : Pat<(i64 (zext Int32Regs:$a)), 2476 (CVT_u64_u32 Int32Regs:$a, CvtNONE)>; 2477 2478 // anyext i32 2479 def : Pat<(i64 (anyext Int32Regs:$a)), 2480 (CVT_u64_u32 Int32Regs:$a, CvtNONE)>; 2481 2482 2483 // truncate i64 2484 def : Pat<(i32 (trunc Int64Regs:$a)), 2485 (CVT_u32_u64 Int64Regs:$a, CvtNONE)>; 2486 def : Pat<(i16 (trunc Int64Regs:$a)), 2487 (CVT_u16_u64 Int64Regs:$a, CvtNONE)>; 2488 def : Pat<(i1 (trunc Int64Regs:$a)), 2489 (SETP_b64ri (ANDb64ri Int64Regs:$a, 1), 1, CmpEQ)>; 2490 2491 // truncate i32 2492 def : Pat<(i16 (trunc Int32Regs:$a)), 2493 (CVT_u16_u32 Int32Regs:$a, CvtNONE)>; 2494 def : Pat<(i1 (trunc Int32Regs:$a)), 2495 (SETP_b32ri (ANDb32ri Int32Regs:$a, 1), 1, CmpEQ)>; 2496 2497 // truncate i16 2498 def : Pat<(i1 (trunc Int16Regs:$a)), 2499 (SETP_b16ri (ANDb16ri Int16Regs:$a, 1), 1, CmpEQ)>; 2500 2501 // sext_inreg 2502 def : Pat<(sext_inreg Int16Regs:$a, i8), (CVT_INREG_s16_s8 Int16Regs:$a)>; 2503 def : Pat<(sext_inreg Int32Regs:$a, i8), (CVT_INREG_s32_s8 Int32Regs:$a)>; 2504 def : Pat<(sext_inreg Int32Regs:$a, i16), (CVT_INREG_s32_s16 Int32Regs:$a)>; 2505 def : Pat<(sext_inreg Int64Regs:$a, i8), (CVT_INREG_s64_s8 Int64Regs:$a)>; 2506 def : Pat<(sext_inreg Int64Regs:$a, i16), (CVT_INREG_s64_s16 Int64Regs:$a)>; 2507 def : Pat<(sext_inreg Int64Regs:$a, i32), (CVT_INREG_s64_s32 Int64Regs:$a)>; 2508 2509 2510 // Select instructions with 32-bit predicates 2511 def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b), 2512 (SELP_b16rr Int16Regs:$a, Int16Regs:$b, 2513 (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; 2514 def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b), 2515 (SELP_b32rr Int32Regs:$a, Int32Regs:$b, 2516 (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; 2517 def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b), 2518 (SELP_b64rr Int64Regs:$a, Int64Regs:$b, 2519 (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; 2520 def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b), 2521 (SELP_f32rr Float32Regs:$a, Float32Regs:$b, 2522 (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; 2523 def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b), 2524 (SELP_f64rr Float64Regs:$a, Float64Regs:$b, 2525 (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; 2526 2527 2528 // pack a set of smaller int registers to a larger int register 2529 def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d), 2530 (ins Int16Regs:$s1, Int16Regs:$s2, 2531 Int16Regs:$s3, Int16Regs:$s4), 2532 "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};", 2533 []>; 2534 def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d), 2535 (ins Int16Regs:$s1, Int16Regs:$s2), 2536 "mov.b32\t$d, {{$s1, $s2}};", 2537 []>; 2538 def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d), 2539 (ins Int32Regs:$s1, Int32Regs:$s2), 2540 "mov.b64\t$d, {{$s1, $s2}};", 2541 []>; 2542 def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d), 2543 (ins Float32Regs:$s1, Float32Regs:$s2), 2544 "mov.b64\t$d, {{$s1, $s2}};", 2545 []>; 2546 2547 // unpack a larger int register to a set of smaller int registers 2548 def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2, 2549 Int16Regs:$d3, Int16Regs:$d4), 2550 (ins Int64Regs:$s), 2551 "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;", 2552 []>; 2553 def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2), 2554 (ins Int32Regs:$s), 2555 "mov.b32\t{{$d1, $d2}}, $s;", 2556 []>; 2557 def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2), 2558 (ins Int64Regs:$s), 2559 "mov.b64\t{{$d1, $d2}}, $s;", 2560 []>; 2561 def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2), 2562 (ins Float64Regs:$s), 2563 "mov.b64\t{{$d1, $d2}}, $s;", 2564 []>; 2565 2566 // Count leading zeros 2567 def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), 2568 "clz.b32\t$d, $a;", 2569 []>; 2570 def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 2571 "clz.b64\t$d, $a;", 2572 []>; 2573 2574 // 32-bit has a direct PTX instruction 2575 def : Pat<(ctlz Int32Regs:$a), 2576 (CLZr32 Int32Regs:$a)>; 2577 def : Pat<(ctlz_zero_undef Int32Regs:$a), 2578 (CLZr32 Int32Regs:$a)>; 2579 2580 // For 64-bit, the result in PTX is actually 32-bit so we zero-extend 2581 // to 64-bit to match the LLVM semantics 2582 def : Pat<(ctlz Int64Regs:$a), 2583 (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>; 2584 def : Pat<(ctlz_zero_undef Int64Regs:$a), 2585 (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>; 2586 2587 // For 16-bit, we zero-extend to 32-bit, then trunc the result back 2588 // to 16-bits (ctlz of a 16-bit value is guaranteed to require less 2589 // than 16 bits to store). We also need to subtract 16 because the 2590 // high-order 16 zeros were counted. 2591 def : Pat<(ctlz Int16Regs:$a), 2592 (SUBi16ri (CVT_u16_u32 (CLZr32 2593 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), 2594 CvtNONE), 16)>; 2595 def : Pat<(ctlz_zero_undef Int16Regs:$a), 2596 (SUBi16ri (CVT_u16_u32 (CLZr32 2597 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), 2598 CvtNONE), 16)>; 2599 2600 // Population count 2601 def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), 2602 "popc.b32\t$d, $a;", 2603 []>; 2604 def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 2605 "popc.b64\t$d, $a;", 2606 []>; 2607 2608 // 32-bit has a direct PTX instruction 2609 def : Pat<(ctpop Int32Regs:$a), 2610 (POPCr32 Int32Regs:$a)>; 2611 2612 // For 64-bit, the result in PTX is actually 32-bit so we zero-extend 2613 // to 64-bit to match the LLVM semantics 2614 def : Pat<(ctpop Int64Regs:$a), 2615 (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>; 2616 2617 // For 16-bit, we zero-extend to 32-bit, then trunc the result back 2618 // to 16-bits (ctpop of a 16-bit value is guaranteed to require less 2619 // than 16 bits to store) 2620 def : Pat<(ctpop Int16Regs:$a), 2621 (CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), 2622 CvtNONE)>; 2623 2624 // fround f64 -> f32 2625 def : Pat<(f32 (fround Float64Regs:$a)), 2626 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>; 2627 def : Pat<(f32 (fround Float64Regs:$a)), 2628 (CVT_f32_f64 Float64Regs:$a, CvtRN)>; 2629 2630 // fextend f32 -> f64 2631 def : Pat<(f64 (fextend Float32Regs:$a)), 2632 (CVT_f64_f32 Float32Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>; 2633 def : Pat<(f64 (fextend Float32Regs:$a)), 2634 (CVT_f64_f32 Float32Regs:$a, CvtNONE)>; 2635 2636 def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone, 2637 [SDNPHasChain, SDNPOptInGlue]>; 2638 2639 //----------------------------------- 2640 // Control-flow 2641 //----------------------------------- 2642 2643 let isTerminator=1 in { 2644 let isReturn=1, isBarrier=1 in 2645 def Return : NVPTXInst<(outs), (ins), "ret;", [(retflag)]>; 2646 2647 let isBranch=1 in 2648 def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target), 2649 "@$a bra \t$target;", 2650 [(brcond Int1Regs:$a, bb:$target)]>; 2651 let isBranch=1 in 2652 def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target), 2653 "@!$a bra \t$target;", 2654 []>; 2655 2656 let isBranch=1, isBarrier=1 in 2657 def GOTO : NVPTXInst<(outs), (ins brtarget:$target), 2658 "bra.uni \t$target;", 2659 [(br bb:$target)]>; 2660 } 2661 2662 def : Pat<(brcond Int32Regs:$a, bb:$target), 2663 (CBranch (SETP_u32ri Int32Regs:$a, 0, CmpNE), bb:$target)>; 2664 2665 // SelectionDAGBuilder::visitSWitchCase() will invert the condition of a 2666 // conditional branch if 2667 // the target block is the next block so that the code can fall through to the 2668 // target block. 2669 // The invertion is done by 'xor condition, 1', which will be translated to 2670 // (setne condition, -1). 2671 // Since ptx supports '@!pred bra target', we should use it. 2672 def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target), 2673 (CBranchOther Int1Regs:$a, bb:$target)>; 2674 2675 // Call 2676 def SDT_NVPTXCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; 2677 def SDT_NVPTXCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, 2678 SDTCisVT<1, i32> ]>; 2679 2680 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart, 2681 [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; 2682 def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd, 2683 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 2684 SDNPSideEffect]>; 2685 2686 def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; 2687 def call : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall, 2688 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; 2689 def calltarget : Operand<i32>; 2690 let isCall=1 in { 2691 def CALL : NVPTXInst<(outs), (ins calltarget:$dst), 2692 "call \t$dst, (1);", []>; 2693 } 2694 2695 def : Pat<(call tglobaladdr:$dst), 2696 (CALL tglobaladdr:$dst)>; 2697 def : Pat<(call texternalsym:$dst), 2698 (CALL texternalsym:$dst)>; 2699 2700 // Pseudo instructions. 2701 class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern> 2702 : NVPTXInst<outs, ins, asmstr, pattern>; 2703 2704 // @TODO: We use some tricks here to emit curly braces. Can we clean this up 2705 // a bit without TableGen modifications? 2706 def Callseq_Start : NVPTXInst<(outs), (ins i32imm:$amt), 2707 "// Callseq Start $amt\n\t{{\n\t.reg .b32 temp_param_reg;\n\t// <end>}}", 2708 [(callseq_start timm:$amt)]>; 2709 def Callseq_End : NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), 2710 "\n\t//{{\n\t}}// Callseq End $amt1", 2711 [(callseq_end timm:$amt1, timm:$amt2)]>; 2712 2713 // trap instruction 2714 2715 def trapinst : NVPTXInst<(outs), (ins), 2716 "trap;", 2717 [(trap)]>; 2718 2719 // Call prototype wrapper 2720 def SDTCallPrototype : SDTypeProfile<0, 1, [SDTCisInt<0>]>; 2721 def CallPrototype 2722 : SDNode<"NVPTXISD::CallPrototype", SDTCallPrototype, 2723 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 2724 def ProtoIdent : Operand<i32> { 2725 let PrintMethod = "printProtoIdent"; 2726 } 2727 def CALL_PROTOTYPE 2728 : NVPTXInst<(outs), (ins ProtoIdent:$ident), 2729 "$ident", [(CallPrototype (i32 texternalsym:$ident))]>; 2730 2731 2732 2733 include "NVPTXIntrinsics.td" 2734 2735 2736 //----------------------------------- 2737 // Notes 2738 //----------------------------------- 2739 // BSWAP is currently expanded. The following is a more efficient 2740 // - for < sm_20, use vector scalar mov, as tesla support native 16-bit register 2741 // - for sm_20, use pmpt (use vector scalar mov to get the pack and 2742 // unpack). sm_20 supports native 32-bit register, but not native 16-bit 2743 // register. 2744