1 //===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file describes the PTX instructions in TableGen format. 11 // 12 //===----------------------------------------------------------------------===// 13 14 include "NVPTXInstrFormats.td" 15 16 // A NOP instruction 17 def NOP : NVPTXInst<(outs), (ins), "", []>; 18 19 // List of vector specific properties 20 def isVecLD : VecInstTypeEnum<1>; 21 def isVecST : VecInstTypeEnum<2>; 22 def isVecBuild : VecInstTypeEnum<3>; 23 def isVecShuffle : VecInstTypeEnum<4>; 24 def isVecExtract : VecInstTypeEnum<5>; 25 def isVecInsert : VecInstTypeEnum<6>; 26 def isVecDest : VecInstTypeEnum<7>; 27 def isVecOther : VecInstTypeEnum<15>; 28 29 //===----------------------------------------------------------------------===// 30 // NVPTX Operand Definitions. 31 //===----------------------------------------------------------------------===// 32 33 def brtarget : Operand<OtherVT>; 34 35 //===----------------------------------------------------------------------===// 36 // NVPTX Instruction Predicate Definitions 37 //===----------------------------------------------------------------------===// 38 39 40 def hasAtomRedG32 : Predicate<"Subtarget.hasAtomRedG32()">; 41 def hasAtomRedS32 : Predicate<"Subtarget.hasAtomRedS32()">; 42 def hasAtomRedGen32 : Predicate<"Subtarget.hasAtomRedGen32()">; 43 def useAtomRedG32forGen32 : 44 Predicate<"!Subtarget.hasAtomRedGen32() && Subtarget.hasAtomRedG32()">; 45 def hasBrkPt : Predicate<"Subtarget.hasBrkPt()">; 46 def hasAtomRedG64 : Predicate<"Subtarget.hasAtomRedG64()">; 47 def hasAtomRedS64 : Predicate<"Subtarget.hasAtomRedS64()">; 48 def hasAtomRedGen64 : Predicate<"Subtarget.hasAtomRedGen64()">; 49 def useAtomRedG64forGen64 : 50 Predicate<"!Subtarget.hasAtomRedGen64() && Subtarget.hasAtomRedG64()">; 51 def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">; 52 def hasVote : Predicate<"Subtarget.hasVote()">; 53 def hasDouble : Predicate<"Subtarget.hasDouble()">; 54 def reqPTX20 : Predicate<"Subtarget.reqPTX20()">; 55 def hasLDG : Predicate<"Subtarget.hasLDG()">; 56 def hasLDU : Predicate<"Subtarget.hasLDU()">; 57 def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">; 58 59 def doF32FTZ : Predicate<"UseF32FTZ">; 60 61 def doFMAF32 : Predicate<"doFMAF32">; 62 def doFMAF32_ftz : Predicate<"(doFMAF32 && UseF32FTZ)">; 63 def doFMAF32AGG : Predicate<"doFMAF32AGG">; 64 def doFMAF32AGG_ftz : Predicate<"(doFMAF32AGG && UseF32FTZ)">; 65 def doFMAF64 : Predicate<"doFMAF64">; 66 def doFMAF64AGG : Predicate<"doFMAF64AGG">; 67 def doFMADF32 : Predicate<"doFMADF32">; 68 def doFMADF32_ftz : Predicate<"(doFMADF32 && UseF32FTZ)">; 69 70 def doMulWide : Predicate<"doMulWide">; 71 72 def allowFMA : Predicate<"allowFMA">; 73 def allowFMA_ftz : Predicate<"(allowFMA && UseF32FTZ)">; 74 75 def do_DIVF32_APPROX : Predicate<"do_DIVF32_PREC==0">; 76 def do_DIVF32_FULL : Predicate<"do_DIVF32_PREC==1">; 77 78 def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">; 79 80 def true : Predicate<"1">; 81 82 //===----------------------------------------------------------------------===// 83 // Special Handling for 8-bit Operands and Operations 84 // 85 // PTX supports 8-bit signed and unsigned types, but does not support 8-bit 86 // operations (like add, shift, etc) except for ld/st/cvt. SASS does not have 87 // 8-bit registers. 88 // 89 // PTX ld, st and cvt instructions permit source and destination data operands 90 // to be wider than the instruction-type size, so that narrow values may be 91 // loaded, stored, and converted using regular-width registers. 92 // 93 // So in PTX generation, we 94 // - always use 16-bit registers in place in 8-bit registers. 95 // (8-bit variables should stay as 8-bit as they represent memory layout.) 96 // - for the following 8-bit operations, we sign-ext/zero-ext the 8-bit values 97 // before operation 98 // . div 99 // . rem 100 // . neg (sign) 101 // . set, setp 102 // . shr 103 // 104 // We are patching the operations by inserting the cvt instructions in the 105 // asm strings of the affected instructions. 106 // 107 // Since vector operations, except for ld/st, are eventually elementized. We 108 // do not need to special-hand the vector 8-bit operations. 109 // 110 // 111 //===----------------------------------------------------------------------===// 112 113 // Generate string block like 114 // { 115 // .reg .s16 %temp1; 116 // .reg .s16 %temp2; 117 // cvt.s16.s8 %temp1, %a; 118 // cvt.s16.s8 %temp2, %b; 119 // opc.s16 %dst, %temp1, %temp2; 120 // } 121 // when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8 122 class Handle_i8rr<string OpcStr, string TypeStr, string CVTStr> { 123 string s = !strconcat("{{\n\t", 124 !strconcat(".reg .", !strconcat(TypeStr, 125 !strconcat(" \t%temp1;\n\t", 126 !strconcat(".reg .", !strconcat(TypeStr, 127 !strconcat(" \t%temp2;\n\t", 128 !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t", 129 !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t", 130 !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))); 131 } 132 133 // Generate string block like 134 // { 135 // .reg .s16 %temp1; 136 // .reg .s16 %temp2; 137 // cvt.s16.s8 %temp1, %a; 138 // mov.b16 %temp2, %b; 139 // cvt.s16.s8 %temp2, %temp2; 140 // opc.s16 %dst, %temp1, %temp2; 141 // } 142 // when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8 143 class Handle_i8ri<string OpcStr, string TypeStr, string CVTStr> { 144 string s = !strconcat("{{\n\t", 145 !strconcat(".reg .", !strconcat(TypeStr, 146 !strconcat(" \t%temp1;\n\t", 147 !strconcat(".reg .", 148 !strconcat(TypeStr, !strconcat(" \t%temp2;\n\t", 149 !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t", 150 !strconcat("mov.b16 \t%temp2, $b;\n\t", 151 !strconcat(CVTStr, !strconcat(" \t%temp2, %temp2;\n\t", 152 !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}"))))))))))))); 153 } 154 155 // Generate string block like 156 // { 157 // .reg .s16 %temp1; 158 // .reg .s16 %temp2; 159 // mov.b16 %temp1, %b; 160 // cvt.s16.s8 %temp1, %temp1; 161 // cvt.s16.s8 %temp2, %a; 162 // opc.s16 %dst, %temp1, %temp2; 163 // } 164 // when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8 165 class Handle_i8ir<string OpcStr, string TypeStr, string CVTStr> { 166 string s = !strconcat("{{\n\t", 167 !strconcat(".reg .", !strconcat(TypeStr, 168 !strconcat(" \t%temp1;\n\t", 169 !strconcat(".reg .", !strconcat(TypeStr, 170 !strconcat(" \t%temp2;\n\t", 171 !strconcat("mov.b16 \t%temp1, $a;\n\t", 172 !strconcat(CVTStr, !strconcat(" \t%temp1, %temp1;\n\t", 173 !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t", 174 !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}"))))))))))))); 175 } 176 177 178 //===----------------------------------------------------------------------===// 179 // Some Common Instruction Class Templates 180 //===----------------------------------------------------------------------===// 181 182 multiclass I3<string OpcStr, SDNode OpNode> { 183 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), 184 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 185 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, 186 Int64Regs:$b))]>; 187 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), 188 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 189 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; 190 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), 191 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 192 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, 193 Int32Regs:$b))]>; 194 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 195 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 196 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; 197 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), 198 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 199 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, 200 Int16Regs:$b))]>; 201 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), 202 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 203 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>; 204 def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), 205 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 206 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>; 207 def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b), 208 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 209 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>; 210 } 211 212 multiclass I3_i8<string OpcStr, SDNode OpNode, string TypeStr, string CVTStr> { 213 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), 214 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 215 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, 216 Int64Regs:$b))]>; 217 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), 218 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 219 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; 220 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), 221 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 222 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, 223 Int32Regs:$b))]>; 224 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 225 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 226 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; 227 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), 228 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 229 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, 230 Int16Regs:$b))]>; 231 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), 232 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 233 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>; 234 def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), 235 Handle_i8rr<OpcStr, TypeStr, CVTStr>.s, 236 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>; 237 def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b), 238 Handle_i8ri<OpcStr, TypeStr, CVTStr>.s, 239 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>; 240 } 241 242 multiclass I3_noi8<string OpcStr, SDNode OpNode> { 243 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), 244 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 245 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, 246 Int64Regs:$b))]>; 247 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), 248 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 249 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; 250 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), 251 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 252 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, 253 Int32Regs:$b))]>; 254 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 255 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 256 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; 257 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), 258 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 259 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, 260 Int16Regs:$b))]>; 261 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), 262 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 263 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>; 264 } 265 266 multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> { 267 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, 268 Int32Regs:$b), 269 !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), 270 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, 271 Int32Regs:$b))]>; 272 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 273 !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), 274 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; 275 } 276 277 multiclass F3<string OpcStr, SDNode OpNode> { 278 def f64rr : NVPTXInst<(outs Float64Regs:$dst), 279 (ins Float64Regs:$a, Float64Regs:$b), 280 !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), 281 [(set Float64Regs:$dst, 282 (OpNode Float64Regs:$a, Float64Regs:$b))]>, 283 Requires<[allowFMA]>; 284 def f64ri : NVPTXInst<(outs Float64Regs:$dst), 285 (ins Float64Regs:$a, f64imm:$b), 286 !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), 287 [(set Float64Regs:$dst, 288 (OpNode Float64Regs:$a, fpimm:$b))]>, 289 Requires<[allowFMA]>; 290 def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), 291 (ins Float32Regs:$a, Float32Regs:$b), 292 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), 293 [(set Float32Regs:$dst, 294 (OpNode Float32Regs:$a, Float32Regs:$b))]>, 295 Requires<[allowFMA_ftz]>; 296 def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), 297 (ins Float32Regs:$a, f32imm:$b), 298 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), 299 [(set Float32Regs:$dst, 300 (OpNode Float32Regs:$a, fpimm:$b))]>, 301 Requires<[allowFMA_ftz]>; 302 def f32rr : NVPTXInst<(outs Float32Regs:$dst), 303 (ins Float32Regs:$a, Float32Regs:$b), 304 !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), 305 [(set Float32Regs:$dst, 306 (OpNode Float32Regs:$a, Float32Regs:$b))]>, 307 Requires<[allowFMA]>; 308 def f32ri : NVPTXInst<(outs Float32Regs:$dst), 309 (ins Float32Regs:$a, f32imm:$b), 310 !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), 311 [(set Float32Regs:$dst, 312 (OpNode Float32Regs:$a, fpimm:$b))]>, 313 Requires<[allowFMA]>; 314 } 315 316 multiclass F3_rn<string OpcStr, SDNode OpNode> { 317 def f64rr : NVPTXInst<(outs Float64Regs:$dst), 318 (ins Float64Regs:$a, Float64Regs:$b), 319 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), 320 [(set Float64Regs:$dst, 321 (OpNode Float64Regs:$a, Float64Regs:$b))]>; 322 def f64ri : NVPTXInst<(outs Float64Regs:$dst), 323 (ins Float64Regs:$a, f64imm:$b), 324 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), 325 [(set Float64Regs:$dst, 326 (OpNode Float64Regs:$a, fpimm:$b))]>; 327 def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), 328 (ins Float32Regs:$a, Float32Regs:$b), 329 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), 330 [(set Float32Regs:$dst, 331 (OpNode Float32Regs:$a, Float32Regs:$b))]>, 332 Requires<[doF32FTZ]>; 333 def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), 334 (ins Float32Regs:$a, f32imm:$b), 335 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), 336 [(set Float32Regs:$dst, 337 (OpNode Float32Regs:$a, fpimm:$b))]>, 338 Requires<[doF32FTZ]>; 339 def f32rr : NVPTXInst<(outs Float32Regs:$dst), 340 (ins Float32Regs:$a, Float32Regs:$b), 341 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), 342 [(set Float32Regs:$dst, 343 (OpNode Float32Regs:$a, Float32Regs:$b))]>; 344 def f32ri : NVPTXInst<(outs Float32Regs:$dst), 345 (ins Float32Regs:$a, f32imm:$b), 346 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), 347 [(set Float32Regs:$dst, 348 (OpNode Float32Regs:$a, fpimm:$b))]>; 349 } 350 351 multiclass F2<string OpcStr, SDNode OpNode> { 352 def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a), 353 !strconcat(OpcStr, ".f64 \t$dst, $a;"), 354 [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>; 355 def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a), 356 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"), 357 [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>, 358 Requires<[doF32FTZ]>; 359 def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a), 360 !strconcat(OpcStr, ".f32 \t$dst, $a;"), 361 [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>; 362 } 363 364 //===----------------------------------------------------------------------===// 365 // NVPTX Instructions. 366 //===----------------------------------------------------------------------===// 367 368 //----------------------------------- 369 // Integer Arithmetic 370 //----------------------------------- 371 372 multiclass ADD_SUB_i1<SDNode OpNode> { 373 def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b), 374 "xor.pred \t$dst, $a, $b;", 375 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>; 376 def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b), 377 "xor.pred \t$dst, $a, $b;", 378 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>; 379 } 380 381 defm ADD_i1 : ADD_SUB_i1<add>; 382 defm SUB_i1 : ADD_SUB_i1<sub>; 383 384 385 defm ADD : I3<"add.s", add>; 386 defm SUB : I3<"sub.s", sub>; 387 388 defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>; 389 defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>; 390 391 defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>; 392 defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>; 393 394 //mul.wide PTX instruction 395 def SInt32Const : PatLeaf<(imm), [{ 396 const APInt &v = N->getAPIntValue(); 397 if (v.isSignedIntN(32)) 398 return true; 399 return false; 400 }]>; 401 402 def UInt32Const : PatLeaf<(imm), [{ 403 const APInt &v = N->getAPIntValue(); 404 if (v.isIntN(32)) 405 return true; 406 return false; 407 }]>; 408 409 def SInt16Const : PatLeaf<(imm), [{ 410 const APInt &v = N->getAPIntValue(); 411 if (v.isSignedIntN(16)) 412 return true; 413 return false; 414 }]>; 415 416 def UInt16Const : PatLeaf<(imm), [{ 417 const APInt &v = N->getAPIntValue(); 418 if (v.isIntN(16)) 419 return true; 420 return false; 421 }]>; 422 423 def Int5Const : PatLeaf<(imm), [{ 424 const APInt &v = N->getAPIntValue(); 425 // Check if 0 <= v < 32 426 // Only then the result from (x << v) will be i32 427 if (v.sge(0) && v.slt(32)) 428 return true; 429 return false; 430 }]>; 431 432 def Int4Const : PatLeaf<(imm), [{ 433 const APInt &v = N->getAPIntValue(); 434 // Check if 0 <= v < 16 435 // Only then the result from (x << v) will be i16 436 if (v.sge(0) && v.slt(16)) 437 return true; 438 return false; 439 }]>; 440 441 def SHL2MUL32 : SDNodeXForm<imm, [{ 442 const APInt &v = N->getAPIntValue(); 443 APInt temp(32, 1); 444 return CurDAG->getTargetConstant(temp.shl(v), MVT::i32); 445 }]>; 446 447 def SHL2MUL16 : SDNodeXForm<imm, [{ 448 const APInt &v = N->getAPIntValue(); 449 APInt temp(16, 1); 450 return CurDAG->getTargetConstant(temp.shl(v), MVT::i16); 451 }]>; 452 453 def MULWIDES64 : NVPTXInst<(outs Int64Regs:$dst), 454 (ins Int32Regs:$a, Int32Regs:$b), 455 "mul.wide.s32 \t$dst, $a, $b;", []>; 456 def MULWIDES64Imm : NVPTXInst<(outs Int64Regs:$dst), 457 (ins Int32Regs:$a, i64imm:$b), 458 "mul.wide.s32 \t$dst, $a, $b;", []>; 459 460 def MULWIDEU64 : NVPTXInst<(outs Int64Regs:$dst), 461 (ins Int32Regs:$a, Int32Regs:$b), 462 "mul.wide.u32 \t$dst, $a, $b;", []>; 463 def MULWIDEU64Imm : NVPTXInst<(outs Int64Regs:$dst), 464 (ins Int32Regs:$a, i64imm:$b), 465 "mul.wide.u32 \t$dst, $a, $b;", []>; 466 467 def MULWIDES32 : NVPTXInst<(outs Int32Regs:$dst), 468 (ins Int16Regs:$a, Int16Regs:$b), 469 "mul.wide.s16 \t$dst, $a, $b;", []>; 470 def MULWIDES32Imm : NVPTXInst<(outs Int32Regs:$dst), 471 (ins Int16Regs:$a, i32imm:$b), 472 "mul.wide.s16 \t$dst, $a, $b;", []>; 473 474 def MULWIDEU32 : NVPTXInst<(outs Int32Regs:$dst), 475 (ins Int16Regs:$a, Int16Regs:$b), 476 "mul.wide.u16 \t$dst, $a, $b;", []>; 477 def MULWIDEU32Imm : NVPTXInst<(outs Int32Regs:$dst), 478 (ins Int16Regs:$a, i32imm:$b), 479 "mul.wide.u16 \t$dst, $a, $b;", []>; 480 481 def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)), 482 (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>, 483 Requires<[doMulWide]>; 484 def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)), 485 (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>, 486 Requires<[doMulWide]>; 487 488 def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)), 489 (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>, 490 Requires<[doMulWide]>; 491 def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)), 492 (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>, 493 Requires<[doMulWide]>; 494 495 def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)), 496 (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>, 497 Requires<[doMulWide]>; 498 def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)), 499 (MULWIDES64Imm Int32Regs:$a, (i64 SInt32Const:$b))>, 500 Requires<[doMulWide]>; 501 502 def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)), 503 (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>; 504 def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)), 505 (MULWIDEU64Imm Int32Regs:$a, (i64 UInt32Const:$b))>, 506 Requires<[doMulWide]>; 507 508 def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)), 509 (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>; 510 def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)), 511 (MULWIDES32Imm Int16Regs:$a, (i32 SInt16Const:$b))>, 512 Requires<[doMulWide]>; 513 514 def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)), 515 (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>; 516 def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)), 517 (MULWIDEU32Imm Int16Regs:$a, (i32 UInt16Const:$b))>, 518 Requires<[doMulWide]>; 519 520 defm MULT : I3<"mul.lo.s", mul>; 521 522 defm MULTHS : I3_noi8<"mul.hi.s", mulhs>; 523 defm MULTHU : I3_noi8<"mul.hi.u", mulhu>; 524 def MULTHSi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), 525 !strconcat("{{ \n\t", 526 !strconcat(".reg \t.s16 temp1; \n\t", 527 !strconcat(".reg \t.s16 temp2; \n\t", 528 !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t", 529 !strconcat("cvt.s16.s8 \ttemp2, $b; \n\t", 530 !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t", 531 !strconcat("shr.s16 \t$dst, $dst, 8; \n\t", 532 !strconcat("}}", "")))))))), 533 [(set Int8Regs:$dst, (mulhs Int8Regs:$a, Int8Regs:$b))]>; 534 def MULTHSi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b), 535 !strconcat("{{ \n\t", 536 !strconcat(".reg \t.s16 temp1; \n\t", 537 !strconcat(".reg \t.s16 temp2; \n\t", 538 !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t", 539 !strconcat("mov.b16 \ttemp2, $b; \n\t", 540 !strconcat("cvt.s16.s8 \ttemp2, temp2; \n\t", 541 !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t", 542 !strconcat("shr.s16 \t$dst, $dst, 8; \n\t", 543 !strconcat("}}", ""))))))))), 544 [(set Int8Regs:$dst, (mulhs Int8Regs:$a, imm:$b))]>; 545 def MULTHUi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), 546 !strconcat("{{ \n\t", 547 !strconcat(".reg \t.u16 temp1; \n\t", 548 !strconcat(".reg \t.u16 temp2; \n\t", 549 !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t", 550 !strconcat("cvt.u16.u8 \ttemp2, $b; \n\t", 551 !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t", 552 !strconcat("shr.u16 \t$dst, $dst, 8; \n\t", 553 !strconcat("}}", "")))))))), 554 [(set Int8Regs:$dst, (mulhu Int8Regs:$a, Int8Regs:$b))]>; 555 def MULTHUi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b), 556 !strconcat("{{ \n\t", 557 !strconcat(".reg \t.u16 temp1; \n\t", 558 !strconcat(".reg \t.u16 temp2; \n\t", 559 !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t", 560 !strconcat("mov.b16 \ttemp2, $b; \n\t", 561 !strconcat("cvt.u16.u8 \ttemp2, temp2; \n\t", 562 !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t", 563 !strconcat("shr.u16 \t$dst, $dst, 8; \n\t", 564 !strconcat("}}", ""))))))))), 565 [(set Int8Regs:$dst, (mulhu Int8Regs:$a, imm:$b))]>; 566 567 568 defm SDIV : I3_i8<"div.s", sdiv, "s16", "cvt.s16.s8">; 569 defm UDIV : I3_i8<"div.u", udiv, "u16", "cvt.u16.u8">; 570 571 defm SREM : I3_i8<"rem.s", srem, "s16", "cvt.s16.s8">; 572 // The ri version will not be selected as DAGCombiner::visitSREM will lower it. 573 defm UREM : I3_i8<"rem.u", urem, "u16", "cvt.u16.u8">; 574 // The ri version will not be selected as DAGCombiner::visitUREM will lower it. 575 576 def MAD8rrr : NVPTXInst<(outs Int8Regs:$dst), 577 (ins Int8Regs:$a, Int8Regs:$b, Int8Regs:$c), 578 "mad.lo.s16 \t$dst, $a, $b, $c;", 579 [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b), 580 Int8Regs:$c))]>; 581 def MAD8rri : NVPTXInst<(outs Int8Regs:$dst), 582 (ins Int8Regs:$a, Int8Regs:$b, i8imm:$c), 583 "mad.lo.s16 \t$dst, $a, $b, $c;", 584 [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b), 585 imm:$c))]>; 586 def MAD8rir : NVPTXInst<(outs Int8Regs:$dst), 587 (ins Int8Regs:$a, i8imm:$b, Int8Regs:$c), 588 "mad.lo.s16 \t$dst, $a, $b, $c;", 589 [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b), 590 Int8Regs:$c))]>; 591 def MAD8rii : NVPTXInst<(outs Int8Regs:$dst), 592 (ins Int8Regs:$a, i8imm:$b, i8imm:$c), 593 "mad.lo.s16 \t$dst, $a, $b, $c;", 594 [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b), 595 imm:$c))]>; 596 597 def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst), 598 (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c), 599 "mad.lo.s16 \t$dst, $a, $b, $c;", 600 [(set Int16Regs:$dst, (add 601 (mul Int16Regs:$a, Int16Regs:$b), Int16Regs:$c))]>; 602 def MAD16rri : NVPTXInst<(outs Int16Regs:$dst), 603 (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c), 604 "mad.lo.s16 \t$dst, $a, $b, $c;", 605 [(set Int16Regs:$dst, (add 606 (mul Int16Regs:$a, Int16Regs:$b), imm:$c))]>; 607 def MAD16rir : NVPTXInst<(outs Int16Regs:$dst), 608 (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c), 609 "mad.lo.s16 \t$dst, $a, $b, $c;", 610 [(set Int16Regs:$dst, (add 611 (mul Int16Regs:$a, imm:$b), Int16Regs:$c))]>; 612 def MAD16rii : NVPTXInst<(outs Int16Regs:$dst), 613 (ins Int16Regs:$a, i16imm:$b, i16imm:$c), 614 "mad.lo.s16 \t$dst, $a, $b, $c;", 615 [(set Int16Regs:$dst, (add (mul Int16Regs:$a, imm:$b), 616 imm:$c))]>; 617 618 def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst), 619 (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c), 620 "mad.lo.s32 \t$dst, $a, $b, $c;", 621 [(set Int32Regs:$dst, (add 622 (mul Int32Regs:$a, Int32Regs:$b), Int32Regs:$c))]>; 623 def MAD32rri : NVPTXInst<(outs Int32Regs:$dst), 624 (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c), 625 "mad.lo.s32 \t$dst, $a, $b, $c;", 626 [(set Int32Regs:$dst, (add 627 (mul Int32Regs:$a, Int32Regs:$b), imm:$c))]>; 628 def MAD32rir : NVPTXInst<(outs Int32Regs:$dst), 629 (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c), 630 "mad.lo.s32 \t$dst, $a, $b, $c;", 631 [(set Int32Regs:$dst, (add 632 (mul Int32Regs:$a, imm:$b), Int32Regs:$c))]>; 633 def MAD32rii : NVPTXInst<(outs Int32Regs:$dst), 634 (ins Int32Regs:$a, i32imm:$b, i32imm:$c), 635 "mad.lo.s32 \t$dst, $a, $b, $c;", 636 [(set Int32Regs:$dst, (add 637 (mul Int32Regs:$a, imm:$b), imm:$c))]>; 638 639 def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst), 640 (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c), 641 "mad.lo.s64 \t$dst, $a, $b, $c;", 642 [(set Int64Regs:$dst, (add 643 (mul Int64Regs:$a, Int64Regs:$b), Int64Regs:$c))]>; 644 def MAD64rri : NVPTXInst<(outs Int64Regs:$dst), 645 (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c), 646 "mad.lo.s64 \t$dst, $a, $b, $c;", 647 [(set Int64Regs:$dst, (add 648 (mul Int64Regs:$a, Int64Regs:$b), imm:$c))]>; 649 def MAD64rir : NVPTXInst<(outs Int64Regs:$dst), 650 (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c), 651 "mad.lo.s64 \t$dst, $a, $b, $c;", 652 [(set Int64Regs:$dst, (add 653 (mul Int64Regs:$a, imm:$b), Int64Regs:$c))]>; 654 def MAD64rii : NVPTXInst<(outs Int64Regs:$dst), 655 (ins Int64Regs:$a, i64imm:$b, i64imm:$c), 656 "mad.lo.s64 \t$dst, $a, $b, $c;", 657 [(set Int64Regs:$dst, (add 658 (mul Int64Regs:$a, imm:$b), imm:$c))]>; 659 660 661 def INEG8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src), 662 !strconcat("cvt.s16.s8 \t$dst, $src;\n\t", 663 "neg.s16 \t$dst, $dst;"), 664 [(set Int8Regs:$dst, (ineg Int8Regs:$src))]>; 665 def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), 666 "neg.s16 \t$dst, $src;", 667 [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>; 668 def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), 669 "neg.s32 \t$dst, $src;", 670 [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>; 671 def INEG64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), 672 "neg.s64 \t$dst, $src;", 673 [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>; 674 675 //----------------------------------- 676 // Floating Point Arithmetic 677 //----------------------------------- 678 679 // Constant 1.0f 680 def FloatConst1 : PatLeaf<(fpimm), [{ 681 if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEsingle) 682 return false; 683 float f = (float)N->getValueAPF().convertToFloat(); 684 return (f==1.0f); 685 }]>; 686 // Constand (double)1.0 687 def DoubleConst1 : PatLeaf<(fpimm), [{ 688 if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEdouble) 689 return false; 690 double d = (double)N->getValueAPF().convertToDouble(); 691 return (d==1.0); 692 }]>; 693 694 defm FADD : F3<"add", fadd>; 695 defm FSUB : F3<"sub", fsub>; 696 defm FMUL : F3<"mul", fmul>; 697 698 defm FADD_rn : F3_rn<"add", fadd>; 699 defm FSUB_rn : F3_rn<"sub", fsub>; 700 defm FMUL_rn : F3_rn<"mul", fmul>; 701 702 defm FABS : F2<"abs", fabs>; 703 defm FNEG : F2<"neg", fneg>; 704 defm FSQRT : F2<"sqrt.rn", fsqrt>; 705 706 // 707 // F64 division 708 // 709 def FDIV641r : NVPTXInst<(outs Float64Regs:$dst), 710 (ins f64imm:$a, Float64Regs:$b), 711 "rcp.rn.f64 \t$dst, $b;", 712 [(set Float64Regs:$dst, 713 (fdiv DoubleConst1:$a, Float64Regs:$b))]>; 714 def FDIV64rr : NVPTXInst<(outs Float64Regs:$dst), 715 (ins Float64Regs:$a, Float64Regs:$b), 716 "div.rn.f64 \t$dst, $a, $b;", 717 [(set Float64Regs:$dst, 718 (fdiv Float64Regs:$a, Float64Regs:$b))]>; 719 def FDIV64ri : NVPTXInst<(outs Float64Regs:$dst), 720 (ins Float64Regs:$a, f64imm:$b), 721 "div.rn.f64 \t$dst, $a, $b;", 722 [(set Float64Regs:$dst, 723 (fdiv Float64Regs:$a, fpimm:$b))]>; 724 725 // 726 // F32 Approximate reciprocal 727 // 728 def FDIV321r_ftz : NVPTXInst<(outs Float32Regs:$dst), 729 (ins f32imm:$a, Float32Regs:$b), 730 "rcp.approx.ftz.f32 \t$dst, $b;", 731 [(set Float32Regs:$dst, 732 (fdiv FloatConst1:$a, Float32Regs:$b))]>, 733 Requires<[do_DIVF32_APPROX, doF32FTZ]>; 734 def FDIV321r : NVPTXInst<(outs Float32Regs:$dst), 735 (ins f32imm:$a, Float32Regs:$b), 736 "rcp.approx.f32 \t$dst, $b;", 737 [(set Float32Regs:$dst, 738 (fdiv FloatConst1:$a, Float32Regs:$b))]>, 739 Requires<[do_DIVF32_APPROX]>; 740 // 741 // F32 Approximate division 742 // 743 def FDIV32approxrr_ftz : NVPTXInst<(outs Float32Regs:$dst), 744 (ins Float32Regs:$a, Float32Regs:$b), 745 "div.approx.ftz.f32 \t$dst, $a, $b;", 746 [(set Float32Regs:$dst, 747 (fdiv Float32Regs:$a, Float32Regs:$b))]>, 748 Requires<[do_DIVF32_APPROX, doF32FTZ]>; 749 def FDIV32approxrr : NVPTXInst<(outs Float32Regs:$dst), 750 (ins Float32Regs:$a, Float32Regs:$b), 751 "div.approx.f32 \t$dst, $a, $b;", 752 [(set Float32Regs:$dst, 753 (fdiv Float32Regs:$a, Float32Regs:$b))]>, 754 Requires<[do_DIVF32_APPROX]>; 755 // 756 // F32 Semi-accurate reciprocal 757 // 758 // rcp.approx gives the same result as div.full(1.0f, a) and is faster. 759 // 760 def FDIV321r_approx_ftz : NVPTXInst<(outs Float32Regs:$dst), 761 (ins f32imm:$a, Float32Regs:$b), 762 "rcp.approx.ftz.f32 \t$dst, $b;", 763 [(set Float32Regs:$dst, 764 (fdiv FloatConst1:$a, Float32Regs:$b))]>, 765 Requires<[do_DIVF32_FULL, doF32FTZ]>; 766 def FDIV321r_approx : NVPTXInst<(outs Float32Regs:$dst), 767 (ins f32imm:$a, Float32Regs:$b), 768 "rcp.approx.f32 \t$dst, $b;", 769 [(set Float32Regs:$dst, 770 (fdiv FloatConst1:$a, Float32Regs:$b))]>, 771 Requires<[do_DIVF32_FULL]>; 772 // 773 // F32 Semi-accurate division 774 // 775 def FDIV32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), 776 (ins Float32Regs:$a, Float32Regs:$b), 777 "div.full.ftz.f32 \t$dst, $a, $b;", 778 [(set Float32Regs:$dst, 779 (fdiv Float32Regs:$a, Float32Regs:$b))]>, 780 Requires<[do_DIVF32_FULL, doF32FTZ]>; 781 def FDIV32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), 782 (ins Float32Regs:$a, f32imm:$b), 783 "div.full.ftz.f32 \t$dst, $a, $b;", 784 [(set Float32Regs:$dst, 785 (fdiv Float32Regs:$a, fpimm:$b))]>, 786 Requires<[do_DIVF32_FULL, doF32FTZ]>; 787 def FDIV32rr : NVPTXInst<(outs Float32Regs:$dst), 788 (ins Float32Regs:$a, Float32Regs:$b), 789 "div.full.f32 \t$dst, $a, $b;", 790 [(set Float32Regs:$dst, 791 (fdiv Float32Regs:$a, Float32Regs:$b))]>, 792 Requires<[do_DIVF32_FULL]>; 793 def FDIV32ri : NVPTXInst<(outs Float32Regs:$dst), 794 (ins Float32Regs:$a, f32imm:$b), 795 "div.full.f32 \t$dst, $a, $b;", 796 [(set Float32Regs:$dst, 797 (fdiv Float32Regs:$a, fpimm:$b))]>, 798 Requires<[do_DIVF32_FULL]>; 799 // 800 // F32 Accurate reciprocal 801 // 802 def FDIV321r_prec_ftz : NVPTXInst<(outs Float32Regs:$dst), 803 (ins f32imm:$a, Float32Regs:$b), 804 "rcp.rn.ftz.f32 \t$dst, $b;", 805 [(set Float32Regs:$dst, 806 (fdiv FloatConst1:$a, Float32Regs:$b))]>, 807 Requires<[reqPTX20, doF32FTZ]>; 808 def FDIV321r_prec : NVPTXInst<(outs Float32Regs:$dst), 809 (ins f32imm:$a, Float32Regs:$b), 810 "rcp.rn.f32 \t$dst, $b;", 811 [(set Float32Regs:$dst, 812 (fdiv FloatConst1:$a, Float32Regs:$b))]>, 813 Requires<[reqPTX20]>; 814 // 815 // F32 Accurate division 816 // 817 def FDIV32rr_prec_ftz : NVPTXInst<(outs Float32Regs:$dst), 818 (ins Float32Regs:$a, Float32Regs:$b), 819 "div.rn.ftz.f32 \t$dst, $a, $b;", 820 [(set Float32Regs:$dst, 821 (fdiv Float32Regs:$a, Float32Regs:$b))]>, 822 Requires<[doF32FTZ, reqPTX20]>; 823 def FDIV32ri_prec_ftz : NVPTXInst<(outs Float32Regs:$dst), 824 (ins Float32Regs:$a, f32imm:$b), 825 "div.rn.ftz.f32 \t$dst, $a, $b;", 826 [(set Float32Regs:$dst, 827 (fdiv Float32Regs:$a, fpimm:$b))]>, 828 Requires<[doF32FTZ, reqPTX20]>; 829 def FDIV32rr_prec : NVPTXInst<(outs Float32Regs:$dst), 830 (ins Float32Regs:$a, Float32Regs:$b), 831 "div.rn.f32 \t$dst, $a, $b;", 832 [(set Float32Regs:$dst, 833 (fdiv Float32Regs:$a, Float32Regs:$b))]>, 834 Requires<[reqPTX20]>; 835 def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst), 836 (ins Float32Regs:$a, f32imm:$b), 837 "div.rn.f32 \t$dst, $a, $b;", 838 [(set Float32Regs:$dst, 839 (fdiv Float32Regs:$a, fpimm:$b))]>, 840 Requires<[reqPTX20]>; 841 842 843 multiclass FPCONTRACT32<string OpcStr, Predicate Pred> { 844 def rrr : NVPTXInst<(outs Float32Regs:$dst), 845 (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c), 846 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 847 [(set Float32Regs:$dst, (fadd 848 (fmul Float32Regs:$a, Float32Regs:$b), 849 Float32Regs:$c))]>, Requires<[Pred]>; 850 // This is to WAR a weird bug in Tablegen that does not automatically 851 // generate the following permutated rule rrr2 from the above rrr. 852 // So we explicitly add it here. This happens to FMA32 only. 853 // See the comments at FMAD32 and FMA32 for more information. 854 def rrr2 : NVPTXInst<(outs Float32Regs:$dst), 855 (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c), 856 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 857 [(set Float32Regs:$dst, (fadd Float32Regs:$c, 858 (fmul Float32Regs:$a, Float32Regs:$b)))]>, 859 Requires<[Pred]>; 860 def rri : NVPTXInst<(outs Float32Regs:$dst), 861 (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c), 862 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 863 [(set Float32Regs:$dst, (fadd 864 (fmul Float32Regs:$a, Float32Regs:$b), fpimm:$c))]>, 865 Requires<[Pred]>; 866 def rir : NVPTXInst<(outs Float32Regs:$dst), 867 (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c), 868 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 869 [(set Float32Regs:$dst, (fadd 870 (fmul Float32Regs:$a, fpimm:$b), Float32Regs:$c))]>, 871 Requires<[Pred]>; 872 def rii : NVPTXInst<(outs Float32Regs:$dst), 873 (ins Float32Regs:$a, f32imm:$b, f32imm:$c), 874 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 875 [(set Float32Regs:$dst, (fadd 876 (fmul Float32Regs:$a, fpimm:$b), fpimm:$c))]>, 877 Requires<[Pred]>; 878 } 879 880 multiclass FPCONTRACT64<string OpcStr, Predicate Pred> { 881 def rrr : NVPTXInst<(outs Float64Regs:$dst), 882 (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c), 883 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 884 [(set Float64Regs:$dst, (fadd 885 (fmul Float64Regs:$a, Float64Regs:$b), 886 Float64Regs:$c))]>, Requires<[Pred]>; 887 def rri : NVPTXInst<(outs Float64Regs:$dst), 888 (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c), 889 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 890 [(set Float64Regs:$dst, (fadd (fmul Float64Regs:$a, 891 Float64Regs:$b), fpimm:$c))]>, Requires<[Pred]>; 892 def rir : NVPTXInst<(outs Float64Regs:$dst), 893 (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c), 894 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 895 [(set Float64Regs:$dst, (fadd 896 (fmul Float64Regs:$a, fpimm:$b), Float64Regs:$c))]>, 897 Requires<[Pred]>; 898 def rii : NVPTXInst<(outs Float64Regs:$dst), 899 (ins Float64Regs:$a, f64imm:$b, f64imm:$c), 900 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 901 [(set Float64Regs:$dst, (fadd 902 (fmul Float64Regs:$a, fpimm:$b), fpimm:$c))]>, 903 Requires<[Pred]>; 904 } 905 906 // Due to a unknown reason (most likely a bug in tablegen), tablegen does not 907 // automatically generate the rrr2 rule from 908 // the rrr rule (see FPCONTRACT32) for FMA32, though it does for FMAD32. 909 // If we reverse the order of the following two lines, then rrr2 rule will be 910 // generated for FMA32, but not for rrr. 911 // Therefore, we manually write the rrr2 rule in FPCONTRACT32. 912 defm FMAD32_ftz : FPCONTRACT32<"mad.ftz.f32", doFMADF32_ftz>; 913 defm FMAD32 : FPCONTRACT32<"mad.f32", doFMADF32>; 914 defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doFMAF32_ftz>; 915 defm FMA32 : FPCONTRACT32<"fma.rn.f32", doFMAF32>; 916 defm FMA64 : FPCONTRACT64<"fma.rn.f64", doFMAF64>; 917 918 // b*c-a => fmad(b, c, -a) 919 multiclass FPCONTRACT32_SUB_PAT_MAD<NVPTXInst Inst, Predicate Pred> { 920 def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a), 921 (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>, 922 Requires<[Pred]>; 923 } 924 925 // a-b*c => fmad(-b,c, a) 926 // - legal because a-b*c <=> a+(-b*c) <=> a+(-b)*c 927 // b*c-a => fmad(b, c, -a) 928 // - legal because b*c-a <=> b*c+(-a) 929 multiclass FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> { 930 def : Pat<(fsub Float32Regs:$a, (fmul Float32Regs:$b, Float32Regs:$c)), 931 (Inst (FNEGf32 Float32Regs:$b), Float32Regs:$c, Float32Regs:$a)>, 932 Requires<[Pred]>; 933 def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a), 934 (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>, 935 Requires<[Pred]>; 936 } 937 938 // a-b*c => fmad(-b,c, a) 939 // b*c-a => fmad(b, c, -a) 940 multiclass FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> { 941 def : Pat<(fsub Float64Regs:$a, (fmul Float64Regs:$b, Float64Regs:$c)), 942 (Inst (FNEGf64 Float64Regs:$b), Float64Regs:$c, Float64Regs:$a)>, 943 Requires<[Pred]>; 944 945 def : Pat<(fsub (fmul Float64Regs:$b, Float64Regs:$c), Float64Regs:$a), 946 (Inst Float64Regs:$b, Float64Regs:$c, (FNEGf64 Float64Regs:$a))>, 947 Requires<[Pred]>; 948 } 949 950 defm FMAF32ext_ftz : FPCONTRACT32_SUB_PAT<FMA32_ftzrrr, doFMAF32AGG_ftz>; 951 defm FMAF32ext : FPCONTRACT32_SUB_PAT<FMA32rrr, doFMAF32AGG>; 952 defm FMADF32ext_ftz : FPCONTRACT32_SUB_PAT_MAD<FMAD32_ftzrrr, doFMADF32_ftz>; 953 defm FMADF32ext : FPCONTRACT32_SUB_PAT_MAD<FMAD32rrr, doFMADF32>; 954 defm FMAF64ext : FPCONTRACT64_SUB_PAT<FMA64rrr, doFMAF64AGG>; 955 956 def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), 957 "sin.approx.f32 \t$dst, $src;", 958 [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>; 959 def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), 960 "cos.approx.f32 \t$dst, $src;", 961 [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>; 962 963 //----------------------------------- 964 // Logical Arithmetic 965 //----------------------------------- 966 967 multiclass LOG_FORMAT<string OpcStr, SDNode OpNode> { 968 def b1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b), 969 !strconcat(OpcStr, ".pred \t$dst, $a, $b;"), 970 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>; 971 def b1ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b), 972 !strconcat(OpcStr, ".pred \t$dst, $a, $b;"), 973 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>; 974 def b8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), 975 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), 976 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>; 977 def b8ri: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b), 978 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), 979 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>; 980 def b16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), 981 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), 982 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, 983 Int16Regs:$b))]>; 984 def b16ri: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), 985 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), 986 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>; 987 def b32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), 988 !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"), 989 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, 990 Int32Regs:$b))]>; 991 def b32ri: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 992 !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"), 993 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; 994 def b64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), 995 !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"), 996 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, 997 Int64Regs:$b))]>; 998 def b64ri: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), 999 !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"), 1000 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; 1001 } 1002 1003 defm OR : LOG_FORMAT<"or", or>; 1004 defm AND : LOG_FORMAT<"and", and>; 1005 defm XOR : LOG_FORMAT<"xor", xor>; 1006 1007 def NOT1: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src), 1008 "not.pred \t$dst, $src;", 1009 [(set Int1Regs:$dst, (not Int1Regs:$src))]>; 1010 def NOT8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src), 1011 "not.b16 \t$dst, $src;", 1012 [(set Int8Regs:$dst, (not Int8Regs:$src))]>; 1013 def NOT16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), 1014 "not.b16 \t$dst, $src;", 1015 [(set Int16Regs:$dst, (not Int16Regs:$src))]>; 1016 def NOT32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), 1017 "not.b32 \t$dst, $src;", 1018 [(set Int32Regs:$dst, (not Int32Regs:$src))]>; 1019 def NOT64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), 1020 "not.b64 \t$dst, $src;", 1021 [(set Int64Regs:$dst, (not Int64Regs:$src))]>; 1022 1023 // For shifts, the second src operand must be 32-bit value 1024 multiclass LSHIFT_FORMAT<string OpcStr, SDNode OpNode> { 1025 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, 1026 Int32Regs:$b), 1027 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 1028 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, 1029 Int32Regs:$b))]>; 1030 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b), 1031 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 1032 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, 1033 (i32 imm:$b)))]>; 1034 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, 1035 Int32Regs:$b), 1036 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 1037 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, 1038 Int32Regs:$b))]>; 1039 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 1040 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 1041 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, 1042 (i32 imm:$b)))]>; 1043 def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b), 1044 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 1045 [(set Int32Regs:$dst, (OpNode (i32 imm:$a), 1046 (i32 imm:$b)))]>; 1047 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, 1048 Int32Regs:$b), 1049 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 1050 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, 1051 Int32Regs:$b))]>; 1052 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b), 1053 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 1054 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, 1055 (i32 imm:$b)))]>; 1056 def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b), 1057 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 1058 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, 1059 Int32Regs:$b))]>; 1060 def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b), 1061 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 1062 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, 1063 (i32 imm:$b)))]>; 1064 } 1065 1066 defm SHL : LSHIFT_FORMAT<"shl.b", shl>; 1067 1068 // For shifts, the second src operand must be 32-bit value 1069 // Need to add cvt for the 8-bits. 1070 multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode, string CVTStr> { 1071 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, 1072 Int32Regs:$b), 1073 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 1074 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, 1075 Int32Regs:$b))]>; 1076 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b), 1077 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 1078 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, 1079 (i32 imm:$b)))]>; 1080 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, 1081 Int32Regs:$b), 1082 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 1083 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, 1084 Int32Regs:$b))]>; 1085 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 1086 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 1087 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, 1088 (i32 imm:$b)))]>; 1089 def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b), 1090 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 1091 [(set Int32Regs:$dst, (OpNode (i32 imm:$a), 1092 (i32 imm:$b)))]>; 1093 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, 1094 Int32Regs:$b), 1095 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 1096 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, 1097 Int32Regs:$b))]>; 1098 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b), 1099 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 1100 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, 1101 (i32 imm:$b)))]>; 1102 def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b), 1103 !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t", 1104 !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))), 1105 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, 1106 Int32Regs:$b))]>; 1107 def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b), 1108 !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t", 1109 !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))), 1110 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, 1111 (i32 imm:$b)))]>; 1112 } 1113 1114 defm SRA : RSHIFT_FORMAT<"shr.s", sra, "cvt.s16.s8">; 1115 defm SRL : RSHIFT_FORMAT<"shr.u", srl, "cvt.u16.u8">; 1116 1117 // 32bit 1118 def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst), 1119 (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2), 1120 !strconcat("{{\n\t", 1121 !strconcat(".reg .b32 %lhs;\n\t", 1122 !strconcat(".reg .b32 %rhs;\n\t", 1123 !strconcat("shl.b32 \t%lhs, $src, $amt1;\n\t", 1124 !strconcat("shr.b32 \t%rhs, $src, $amt2;\n\t", 1125 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", 1126 !strconcat("}}", ""))))))), 1127 []>; 1128 1129 def SUB_FRM_32 : SDNodeXForm<imm, [{ 1130 return CurDAG->getTargetConstant(32-N->getZExtValue(), MVT::i32); 1131 }]>; 1132 1133 def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)), 1134 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>; 1135 def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)), 1136 (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>; 1137 1138 def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, 1139 Int32Regs:$amt), 1140 !strconcat("{{\n\t", 1141 !strconcat(".reg .b32 %lhs;\n\t", 1142 !strconcat(".reg .b32 %rhs;\n\t", 1143 !strconcat(".reg .b32 %amt2;\n\t", 1144 !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t", 1145 !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t", 1146 !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t", 1147 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", 1148 !strconcat("}}", ""))))))))), 1149 [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>; 1150 1151 def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, 1152 Int32Regs:$amt), 1153 !strconcat("{{\n\t", 1154 !strconcat(".reg .b32 %lhs;\n\t", 1155 !strconcat(".reg .b32 %rhs;\n\t", 1156 !strconcat(".reg .b32 %amt2;\n\t", 1157 !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t", 1158 !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t", 1159 !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t", 1160 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", 1161 !strconcat("}}", ""))))))))), 1162 [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>; 1163 1164 // 64bit 1165 def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, 1166 i32imm:$amt1, i32imm:$amt2), 1167 !strconcat("{{\n\t", 1168 !strconcat(".reg .b64 %lhs;\n\t", 1169 !strconcat(".reg .b64 %rhs;\n\t", 1170 !strconcat("shl.b64 \t%lhs, $src, $amt1;\n\t", 1171 !strconcat("shr.b64 \t%rhs, $src, $amt2;\n\t", 1172 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t", 1173 !strconcat("}}", ""))))))), 1174 []>; 1175 1176 def SUB_FRM_64 : SDNodeXForm<imm, [{ 1177 return CurDAG->getTargetConstant(64-N->getZExtValue(), MVT::i32); 1178 }]>; 1179 1180 def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)), 1181 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>; 1182 def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)), 1183 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>; 1184 1185 def ROTL64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, 1186 Int32Regs:$amt), 1187 !strconcat("{{\n\t", 1188 !strconcat(".reg .b64 %lhs;\n\t", 1189 !strconcat(".reg .b64 %rhs;\n\t", 1190 !strconcat(".reg .u32 %amt2;\n\t", 1191 !strconcat("shl.b64 \t%lhs, $src, $amt;\n\t", 1192 !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t", 1193 !strconcat("shr.b64 \t%rhs, $src, %amt2;\n\t", 1194 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t", 1195 !strconcat("}}", ""))))))))), 1196 [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>; 1197 1198 def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, 1199 Int32Regs:$amt), 1200 !strconcat("{{\n\t", 1201 !strconcat(".reg .b64 %lhs;\n\t", 1202 !strconcat(".reg .b64 %rhs;\n\t", 1203 !strconcat(".reg .u32 %amt2;\n\t", 1204 !strconcat("shr.b64 \t%lhs, $src, $amt;\n\t", 1205 !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t", 1206 !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t", 1207 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t", 1208 !strconcat("}}", ""))))))))), 1209 [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>; 1210 1211 1212 //----------------------------------- 1213 // Data Movement (Load / Store, Move) 1214 //----------------------------------- 1215 1216 def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], 1217 [SDNPWantRoot]>; 1218 def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex], 1219 [SDNPWantRoot]>; 1220 1221 def MEMri : Operand<i32> { 1222 let PrintMethod = "printMemOperand"; 1223 let MIOperandInfo = (ops Int32Regs, i32imm); 1224 } 1225 def MEMri64 : Operand<i64> { 1226 let PrintMethod = "printMemOperand"; 1227 let MIOperandInfo = (ops Int64Regs, i64imm); 1228 } 1229 1230 def imem : Operand<iPTR> { 1231 let PrintMethod = "printOperand"; 1232 } 1233 1234 def imemAny : Operand<iPTRAny> { 1235 let PrintMethod = "printOperand"; 1236 } 1237 1238 def LdStCode : Operand<i32> { 1239 let PrintMethod = "printLdStCode"; 1240 } 1241 1242 def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; 1243 def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>; 1244 1245 def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a), 1246 "mov.u32 \t$dst, $a;", 1247 [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>; 1248 1249 def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a), 1250 "mov.u64 \t$dst, $a;", 1251 [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>; 1252 1253 // copyPhysreg is hard-coded in NVPTXInstrInfo.cpp 1254 let IsSimpleMove=1 in { 1255 def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss), 1256 "mov.pred \t$dst, $sss;", []>; 1257 def IMOV8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$sss), 1258 "mov.u16 \t$dst, $sss;", []>; 1259 def IMOV16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss), 1260 "mov.u16 \t$dst, $sss;", []>; 1261 def IMOV32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss), 1262 "mov.u32 \t$dst, $sss;", []>; 1263 def IMOV64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss), 1264 "mov.u64 \t$dst, $sss;", []>; 1265 1266 def FMOV32rr: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), 1267 "mov.f32 \t$dst, $src;", []>; 1268 def FMOV64rr: NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src), 1269 "mov.f64 \t$dst, $src;", []>; 1270 } 1271 def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src), 1272 "mov.pred \t$dst, $src;", 1273 [(set Int1Regs:$dst, imm:$src)]>; 1274 def IMOV8ri: NVPTXInst<(outs Int8Regs:$dst), (ins i8imm:$src), 1275 "mov.u16 \t$dst, $src;", 1276 [(set Int8Regs:$dst, imm:$src)]>; 1277 def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src), 1278 "mov.u16 \t$dst, $src;", 1279 [(set Int16Regs:$dst, imm:$src)]>; 1280 def IMOV32ri: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src), 1281 "mov.u32 \t$dst, $src;", 1282 [(set Int32Regs:$dst, imm:$src)]>; 1283 def IMOV64i: NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src), 1284 "mov.u64 \t$dst, $src;", 1285 [(set Int64Regs:$dst, imm:$src)]>; 1286 1287 def FMOV32ri: NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src), 1288 "mov.f32 \t$dst, $src;", 1289 [(set Float32Regs:$dst, fpimm:$src)]>; 1290 def FMOV64ri: NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src), 1291 "mov.f64 \t$dst, $src;", 1292 [(set Float64Regs:$dst, fpimm:$src)]>; 1293 1294 def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>; 1295 1296 //---- Copy Frame Index ---- 1297 def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr), 1298 "add.u32 \t$dst, ${addr:add};", 1299 [(set Int32Regs:$dst, ADDRri:$addr)]>; 1300 def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr), 1301 "add.u64 \t$dst, ${addr:add};", 1302 [(set Int64Regs:$dst, ADDRri64:$addr)]>; 1303 1304 //----------------------------------- 1305 // Comparison and Selection 1306 //----------------------------------- 1307 1308 // Generate string block like 1309 // { 1310 // .reg .pred p; 1311 // setp.gt.s16 p, %a, %b; 1312 // selp.s16 %dst, -1, 0, p; 1313 // } 1314 // when OpcStr=setp.gt.s sz1=16 sz2=16 d=%dst a=%a b=%b 1315 class Set_Str<string OpcStr, string sz1, string sz2, string d, string a, 1316 string b> { 1317 string t1 = "{{\n\t.reg .pred p;\n\t"; 1318 string t2 = !strconcat(t1 , OpcStr); 1319 string t3 = !strconcat(t2 , sz1); 1320 string t4 = !strconcat(t3 , " \tp, "); 1321 string t5 = !strconcat(t4 , a); 1322 string t6 = !strconcat(t5 , ", "); 1323 string t7 = !strconcat(t6 , b); 1324 string t8 = !strconcat(t7 , ";\n\tselp.s"); 1325 string t9 = !strconcat(t8 , sz2); 1326 string t10 = !strconcat(t9, " \t"); 1327 string t11 = !strconcat(t10, d); 1328 string s = !strconcat(t11, ", -1, 0, p;\n\t}}"); 1329 } 1330 1331 // Generate string block like 1332 // { 1333 // .reg .pred p; 1334 // .reg .s16 %temp1; 1335 // .reg .s16 %temp2; 1336 // cvt.s16.s8 %temp1, %a; 1337 // cvt s16.s8 %temp1, %b; 1338 // setp.gt.s16 p, %temp1, %temp2; 1339 // selp.s16 %dst, -1, 0, p; 1340 // } 1341 // when OpcStr=setp.gt.s d=%dst a=%a b=%b type=s16 cvt=cvt.s16.s8 1342 class Set_Stri8<string OpcStr, string d, string a, string b, string type, 1343 string cvt> { 1344 string t1 = "{{\n\t.reg .pred p;\n\t"; 1345 string t2 = !strconcat(t1, ".reg ."); 1346 string t3 = !strconcat(t2, type); 1347 string t4 = !strconcat(t3, " %temp1;\n\t"); 1348 string t5 = !strconcat(t4, ".reg ."); 1349 string t6 = !strconcat(t5, type); 1350 string t7 = !strconcat(t6, " %temp2;\n\t"); 1351 string t8 = !strconcat(t7, cvt); 1352 string t9 = !strconcat(t8, " \t%temp1, "); 1353 string t10 = !strconcat(t9, a); 1354 string t11 = !strconcat(t10, ";\n\t"); 1355 string t12 = !strconcat(t11, cvt); 1356 string t13 = !strconcat(t12, " \t%temp2, "); 1357 string t14 = !strconcat(t13, b); 1358 string t15 = !strconcat(t14, ";\n\t"); 1359 string t16 = !strconcat(t15, OpcStr); 1360 string t17 = !strconcat(t16, "16"); 1361 string t18 = !strconcat(t17, " \tp, %temp1, %temp2;\n\t"); 1362 string t19 = !strconcat(t18, "selp.s16 \t"); 1363 string t20 = !strconcat(t19, d); 1364 string s = !strconcat(t20, ", -1, 0, p;\n\t}}"); 1365 } 1366 1367 multiclass ISET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode, 1368 string TypeStr, string CVTStr> { 1369 def i8rr_toi8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), 1370 Set_Stri8<OpcStr, "$dst", "$a", "$b", TypeStr, CVTStr>.s, 1371 []>; 1372 def i16rr_toi16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, 1373 Int16Regs:$b), 1374 Set_Str<OpcStr, "16", "16", "$dst", "$a", "$b">.s, 1375 []>; 1376 def i32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, 1377 Int32Regs:$b), 1378 Set_Str<OpcStr, "32", "32", "$dst", "$a", "$b">.s, 1379 []>; 1380 def i64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, 1381 Int64Regs:$b), 1382 Set_Str<OpcStr, "64", "64", "$dst", "$a", "$b">.s, 1383 []>; 1384 1385 def i8rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), 1386 Handle_i8rr<OpcStr, TypeStr, CVTStr>.s, 1387 [(set Int1Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>; 1388 def i8ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, i8imm:$b), 1389 Handle_i8ri<OpcStr, TypeStr, CVTStr>.s, 1390 [(set Int1Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>; 1391 def i8ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i8imm:$a, Int8Regs:$b), 1392 Handle_i8ir<OpcStr, TypeStr, CVTStr>.s, 1393 [(set Int1Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>; 1394 def i16rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), 1395 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 1396 [(set Int1Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>; 1397 def i16ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, i16imm:$b), 1398 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 1399 [(set Int1Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>; 1400 def i16ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i16imm:$a, Int16Regs:$b), 1401 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 1402 [(set Int1Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>; 1403 def i32rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), 1404 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 1405 [(set Int1Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; 1406 def i32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 1407 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 1408 [(set Int1Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; 1409 def i32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i32imm:$a, Int32Regs:$b), 1410 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 1411 [(set Int1Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>; 1412 def i64rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), 1413 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 1414 [(set Int1Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>; 1415 def i64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, i64imm:$b), 1416 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 1417 [(set Int1Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; 1418 def i64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i64imm:$a, Int64Regs:$b), 1419 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 1420 [(set Int1Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>; 1421 1422 def i8rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), 1423 Handle_i8rr<OpcStr_u32, TypeStr, CVTStr>.s, 1424 [(set Int32Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>; 1425 def i8ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, i8imm:$b), 1426 Handle_i8ri<OpcStr_u32, TypeStr, CVTStr>.s, 1427 [(set Int32Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>; 1428 def i8ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i8imm:$a, Int8Regs:$b), 1429 Handle_i8ir<OpcStr_u32, TypeStr, CVTStr>.s, 1430 [(set Int32Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>; 1431 def i16rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, 1432 Int16Regs:$b), 1433 !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"), 1434 [(set Int32Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>; 1435 def i16ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b), 1436 !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"), 1437 [(set Int32Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>; 1438 def i16ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i16imm:$a, Int16Regs:$b), 1439 !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"), 1440 [(set Int32Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>; 1441 def i32rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, 1442 Int32Regs:$b), 1443 !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"), 1444 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; 1445 def i32ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 1446 !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"), 1447 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; 1448 def i32ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, Int32Regs:$b), 1449 !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"), 1450 [(set Int32Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>; 1451 def i64rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a, 1452 Int64Regs:$b), 1453 !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"), 1454 [(set Int32Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>; 1455 def i64ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a, i64imm:$b), 1456 !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"), 1457 [(set Int32Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; 1458 def i64ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i64imm:$a, Int64Regs:$b), 1459 !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"), 1460 [(set Int32Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>; 1461 } 1462 1463 multiclass FSET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode> { 1464 def f32rr_toi32_ftz: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a, 1465 Float32Regs:$b), 1466 Set_Str<OpcStr, "ftz.f32", "32", "$dst", "$a", "$b">.s, 1467 []>, Requires<[doF32FTZ]>; 1468 def f32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a, 1469 Float32Regs:$b), 1470 Set_Str<OpcStr, "f32", "32", "$dst", "$a", "$b">.s, 1471 []>; 1472 def f64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Float64Regs:$a, 1473 Float64Regs:$b), 1474 Set_Str<OpcStr, "f64", "64", "$dst", "$a", "$b">.s, 1475 []>; 1476 def f64rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float64Regs:$a, 1477 Float64Regs:$b), 1478 Set_Str<OpcStr, "f64", "32", "$dst", "$a", "$b">.s, 1479 []>; 1480 1481 def f32rr_p_ftz: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a 1482 , Float32Regs:$b), 1483 !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"), 1484 [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]> 1485 , Requires<[doF32FTZ]>; 1486 def f32rr_p: NVPTXInst<(outs Int1Regs:$dst), 1487 (ins Float32Regs:$a, Float32Regs:$b), 1488 !strconcat(OpcStr, "f32 \t$dst, $a, $b;"), 1489 [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>; 1490 def f32ri_p_ftz: NVPTXInst<(outs Int1Regs:$dst), 1491 (ins Float32Regs:$a, f32imm:$b), 1492 !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"), 1493 [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, 1494 Requires<[doF32FTZ]>; 1495 def f32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a, f32imm:$b), 1496 !strconcat(OpcStr, "f32 \t$dst, $a, $b;"), 1497 [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>; 1498 def f32ir_p_ftz: NVPTXInst<(outs Int1Regs:$dst), 1499 (ins f32imm:$a, Float32Regs:$b), 1500 !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"), 1501 [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>, 1502 Requires<[doF32FTZ]>; 1503 def f32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f32imm:$a, Float32Regs:$b), 1504 !strconcat(OpcStr, "f32 \t$dst, $a, $b;"), 1505 [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>; 1506 def f64rr_p: NVPTXInst<(outs Int1Regs:$dst), 1507 (ins Float64Regs:$a, Float64Regs:$b), 1508 !strconcat(OpcStr, "f64 \t$dst, $a, $b;"), 1509 [(set Int1Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>; 1510 def f64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float64Regs:$a, f64imm:$b), 1511 !strconcat(OpcStr, "f64 \t$dst, $a, $b;"), 1512 [(set Int1Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>; 1513 def f64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f64imm:$a, Float64Regs:$b), 1514 !strconcat(OpcStr, "f64 \t$dst, $a, $b;"), 1515 [(set Int1Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>; 1516 1517 def f32rr_u32_ftz: NVPTXInst<(outs Int32Regs:$dst), 1518 (ins Float32Regs:$a, Float32Regs:$b), 1519 !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"), 1520 [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>; 1521 def f32rr_u32: NVPTXInst<(outs Int32Regs:$dst), 1522 (ins Float32Regs:$a, Float32Regs:$b), 1523 !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"), 1524 [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>; 1525 def f32ri_u32_ftz: NVPTXInst<(outs Int32Regs:$dst), 1526 (ins Float32Regs:$a, f32imm:$b), 1527 !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"), 1528 [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>; 1529 def f32ri_u32: NVPTXInst<(outs Int32Regs:$dst), 1530 (ins Float32Regs:$a, f32imm:$b), 1531 !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"), 1532 [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>; 1533 def f32ir_u32_ftz: NVPTXInst<(outs Int32Regs:$dst), 1534 (ins f32imm:$a, Float32Regs:$b), 1535 !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"), 1536 [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>; 1537 def f32ir_u32: NVPTXInst<(outs Int32Regs:$dst), 1538 (ins f32imm:$a, Float32Regs:$b), 1539 !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"), 1540 [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>; 1541 def f64rr_u32: NVPTXInst<(outs Int32Regs:$dst), 1542 (ins Float64Regs:$a, Float64Regs:$b), 1543 !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"), 1544 [(set Int32Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>; 1545 def f64ri_u32: NVPTXInst<(outs Int32Regs:$dst), 1546 (ins Float64Regs:$a, f64imm:$b), 1547 !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"), 1548 [(set Int32Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>; 1549 def f64ir_u32: NVPTXInst<(outs Int32Regs:$dst), 1550 (ins f64imm:$a, Float64Regs:$b), 1551 !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"), 1552 [(set Int32Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>; 1553 } 1554 1555 defm ISetSGT 1556 : ISET_FORMAT<"setp.gt.s", "set.gt.u32.s", setgt, "s16", "cvt.s16.s8">; 1557 defm ISetUGT 1558 : ISET_FORMAT<"setp.gt.u", "set.gt.u32.u", setugt, "u16", "cvt.u16.u8">; 1559 defm ISetSLT 1560 : ISET_FORMAT<"setp.lt.s", "set.lt.u32.s", setlt, "s16", "cvt.s16.s8">; 1561 defm ISetULT 1562 : ISET_FORMAT<"setp.lt.u", "set.lt.u32.u", setult, "u16", "cvt.u16.u8">; 1563 defm ISetSGE 1564 : ISET_FORMAT<"setp.ge.s", "set.ge.u32.s", setge, "s16", "cvt.s16.s8">; 1565 defm ISetUGE 1566 : ISET_FORMAT<"setp.ge.u", "set.ge.u32.u", setuge, "u16", "cvt.u16.u8">; 1567 defm ISetSLE 1568 : ISET_FORMAT<"setp.le.s", "set.le.u32.s", setle, "s16", "cvt.s16.s8">; 1569 defm ISetULE 1570 : ISET_FORMAT<"setp.le.u", "set.le.u32.u", setule, "u16", "cvt.u16.u8">; 1571 defm ISetSEQ 1572 : ISET_FORMAT<"setp.eq.s", "set.eq.u32.s", seteq, "s16", "cvt.s16.s8">; 1573 defm ISetUEQ 1574 : ISET_FORMAT<"setp.eq.u", "set.eq.u32.u", setueq, "u16", "cvt.u16.u8">; 1575 defm ISetSNE 1576 : ISET_FORMAT<"setp.ne.s", "set.ne.u32.s", setne, "s16", "cvt.s16.s8">; 1577 defm ISetUNE 1578 : ISET_FORMAT<"setp.ne.u", "set.ne.u32.u", setune, "u16", "cvt.u16.u8">; 1579 1580 def ISetSNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst), 1581 (ins Int1Regs:$a, Int1Regs:$b), 1582 "xor.pred \t$dst, $a, $b;", 1583 [(set Int1Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>; 1584 def ISetUNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst), 1585 (ins Int1Regs:$a, Int1Regs:$b), 1586 "xor.pred \t$dst, $a, $b;", 1587 [(set Int1Regs:$dst, (setune Int1Regs:$a, Int1Regs:$b))]>; 1588 def ISetSEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst), 1589 (ins Int1Regs:$a, Int1Regs:$b), 1590 !strconcat("{{\n\t", 1591 !strconcat(".reg .pred temp;\n\t", 1592 !strconcat("xor.pred \ttemp, $a, $b;\n\t", 1593 !strconcat("not.pred \t$dst, temp;\n\t}}","")))), 1594 [(set Int1Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>; 1595 def ISetUEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst), 1596 (ins Int1Regs:$a, Int1Regs:$b), 1597 !strconcat("{{\n\t", 1598 !strconcat(".reg .pred temp;\n\t", 1599 !strconcat("xor.pred \ttemp, $a, $b;\n\t", 1600 !strconcat("not.pred \t$dst, temp;\n\t}}","")))), 1601 [(set Int1Regs:$dst, (setueq Int1Regs:$a, Int1Regs:$b))]>; 1602 1603 // Compare 2 i1's and produce a u32 1604 def ISETSNEi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst), 1605 (ins Int1Regs:$a, Int1Regs:$b), 1606 !strconcat("{{\n\t", 1607 !strconcat(".reg .pred temp;\n\t", 1608 !strconcat("xor.pred \ttemp, $a, $b;\n\t", 1609 !strconcat("selp.u32 \t$dst, -1, 0, temp;", "\n\t}}")))), 1610 [(set Int32Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>; 1611 def ISETSEQi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst), 1612 (ins Int1Regs:$a, Int1Regs:$b), 1613 !strconcat("{{\n\t", 1614 !strconcat(".reg .pred temp;\n\t", 1615 !strconcat("xor.pred \ttemp, $a, $b;\n\t", 1616 !strconcat("selp.u32 \t$dst, 0, -1, temp;", "\n\t}}")))), 1617 [(set Int32Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>; 1618 1619 defm FSetGT : FSET_FORMAT<"setp.gt.", "set.gt.u32.", setogt>; 1620 defm FSetLT : FSET_FORMAT<"setp.lt.", "set.lt.u32.", setolt>; 1621 defm FSetGE : FSET_FORMAT<"setp.ge.", "set.ge.u32.", setoge>; 1622 defm FSetLE : FSET_FORMAT<"setp.le.", "set.le.u32.", setole>; 1623 defm FSetEQ : FSET_FORMAT<"setp.eq.", "set.eq.u32.", setoeq>; 1624 defm FSetNE : FSET_FORMAT<"setp.ne.", "set.ne.u32.", setone>; 1625 1626 defm FSetUGT : FSET_FORMAT<"setp.gtu.", "set.gtu.u32.", setugt>; 1627 defm FSetULT : FSET_FORMAT<"setp.ltu.", "set.ltu.u32.",setult>; 1628 defm FSetUGE : FSET_FORMAT<"setp.geu.", "set.geu.u32.",setuge>; 1629 defm FSetULE : FSET_FORMAT<"setp.leu.", "set.leu.u32.",setule>; 1630 defm FSetUEQ : FSET_FORMAT<"setp.equ.", "set.equ.u32.",setueq>; 1631 defm FSetUNE : FSET_FORMAT<"setp.neu.", "set.neu.u32.",setune>; 1632 1633 defm FSetNUM : FSET_FORMAT<"setp.num.", "set.num.u32.",seto>; 1634 defm FSetNAN : FSET_FORMAT<"setp.nan.", "set.nan.u32.",setuo>; 1635 1636 def SELECTi1rr : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)), 1637 (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a), 1638 (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>; 1639 def SELECTi8rr : NVPTXInst<(outs Int8Regs:$dst), 1640 (ins Int8Regs:$a, Int8Regs:$b, Int1Regs:$p), 1641 "selp.b16 \t$dst, $a, $b, $p;", 1642 [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, Int8Regs:$b))]>; 1643 def SELECTi8ri : NVPTXInst<(outs Int8Regs:$dst), 1644 (ins Int8Regs:$a, i8imm:$b, Int1Regs:$p), 1645 "selp.b16 \t$dst, $a, $b, $p;", 1646 [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, imm:$b))]>; 1647 def SELECTi8ir : NVPTXInst<(outs Int8Regs:$dst), 1648 (ins i8imm:$a, Int8Regs:$b, Int1Regs:$p), 1649 "selp.b16 \t$dst, $a, $b, $p;", 1650 [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, Int8Regs:$b))]>; 1651 def SELECTi8ii : NVPTXInst<(outs Int8Regs:$dst), 1652 (ins i8imm:$a, i8imm:$b, Int1Regs:$p), 1653 "selp.b16 \t$dst, $a, $b, $p;", 1654 [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>; 1655 1656 def SELECTi16rr : NVPTXInst<(outs Int16Regs:$dst), 1657 (ins Int16Regs:$a, Int16Regs:$b, Int1Regs:$p), 1658 "selp.b16 \t$dst, $a, $b, $p;", 1659 [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, Int16Regs:$b))]>; 1660 def SELECTi16ri : NVPTXInst<(outs Int16Regs:$dst), 1661 (ins Int16Regs:$a, i16imm:$b, Int1Regs:$p), 1662 "selp.b16 \t$dst, $a, $b, $p;", 1663 [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, imm:$b))]>; 1664 def SELECTi16ir : NVPTXInst<(outs Int16Regs:$dst), 1665 (ins i16imm:$a, Int16Regs:$b, Int1Regs:$p), 1666 "selp.b16 \t$dst, $a, $b, $p;", 1667 [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, Int16Regs:$b))]>; 1668 def SELECTi16ii : NVPTXInst<(outs Int16Regs:$dst), 1669 (ins i16imm:$a, i16imm:$b, Int1Regs:$p), 1670 "selp.b16 \t$dst, $a, $b, $p;", 1671 [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>; 1672 1673 def SELECTi32rr : NVPTXInst<(outs Int32Regs:$dst), 1674 (ins Int32Regs:$a, Int32Regs:$b, Int1Regs:$p), 1675 "selp.b32 \t$dst, $a, $b, $p;", 1676 [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, Int32Regs:$b))]>; 1677 def SELECTi32ri : NVPTXInst<(outs Int32Regs:$dst), 1678 (ins Int32Regs:$a, i32imm:$b, Int1Regs:$p), 1679 "selp.b32 \t$dst, $a, $b, $p;", 1680 [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, imm:$b))]>; 1681 def SELECTi32ir : NVPTXInst<(outs Int32Regs:$dst), 1682 (ins i32imm:$a, Int32Regs:$b, Int1Regs:$p), 1683 "selp.b32 \t$dst, $a, $b, $p;", 1684 [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, Int32Regs:$b))]>; 1685 def SELECTi32ii : NVPTXInst<(outs Int32Regs:$dst), 1686 (ins i32imm:$a, i32imm:$b, Int1Regs:$p), 1687 "selp.b32 \t$dst, $a, $b, $p;", 1688 [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>; 1689 1690 def SELECTi64rr : NVPTXInst<(outs Int64Regs:$dst), 1691 (ins Int64Regs:$a, Int64Regs:$b, Int1Regs:$p), 1692 "selp.b64 \t$dst, $a, $b, $p;", 1693 [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, Int64Regs:$b))]>; 1694 def SELECTi64ri : NVPTXInst<(outs Int64Regs:$dst), 1695 (ins Int64Regs:$a, i64imm:$b, Int1Regs:$p), 1696 "selp.b64 \t$dst, $a, $b, $p;", 1697 [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, imm:$b))]>; 1698 def SELECTi64ir : NVPTXInst<(outs Int64Regs:$dst), 1699 (ins i64imm:$a, Int64Regs:$b, Int1Regs:$p), 1700 "selp.b64 \t$dst, $a, $b, $p;", 1701 [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, Int64Regs:$b))]>; 1702 def SELECTi64ii : NVPTXInst<(outs Int64Regs:$dst), 1703 (ins i64imm:$a, i64imm:$b, Int1Regs:$p), 1704 "selp.b64 \t$dst, $a, $b, $p;", 1705 [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>; 1706 1707 def SELECTf32rr : NVPTXInst<(outs Float32Regs:$dst), 1708 (ins Float32Regs:$a, Float32Regs:$b, Int1Regs:$p), 1709 "selp.f32 \t$dst, $a, $b, $p;", 1710 [(set Float32Regs:$dst, 1711 (select Int1Regs:$p, Float32Regs:$a, Float32Regs:$b))]>; 1712 def SELECTf32ri : NVPTXInst<(outs Float32Regs:$dst), 1713 (ins Float32Regs:$a, f32imm:$b, Int1Regs:$p), 1714 "selp.f32 \t$dst, $a, $b, $p;", 1715 [(set Float32Regs:$dst, (select Int1Regs:$p, Float32Regs:$a, fpimm:$b))]>; 1716 def SELECTf32ir : NVPTXInst<(outs Float32Regs:$dst), 1717 (ins f32imm:$a, Float32Regs:$b, Int1Regs:$p), 1718 "selp.f32 \t$dst, $a, $b, $p;", 1719 [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float32Regs:$b))]>; 1720 def SELECTf32ii : NVPTXInst<(outs Float32Regs:$dst), 1721 (ins f32imm:$a, f32imm:$b, Int1Regs:$p), 1722 "selp.f32 \t$dst, $a, $b, $p;", 1723 [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>; 1724 1725 def SELECTf64rr : NVPTXInst<(outs Float64Regs:$dst), 1726 (ins Float64Regs:$a, Float64Regs:$b, Int1Regs:$p), 1727 "selp.f64 \t$dst, $a, $b, $p;", 1728 [(set Float64Regs:$dst, 1729 (select Int1Regs:$p, Float64Regs:$a, Float64Regs:$b))]>; 1730 def SELECTf64ri : NVPTXInst<(outs Float64Regs:$dst), 1731 (ins Float64Regs:$a, f64imm:$b, Int1Regs:$p), 1732 "selp.f64 \t$dst, $a, $b, $p;", 1733 [(set Float64Regs:$dst, (select Int1Regs:$p, Float64Regs:$a, fpimm:$b))]>; 1734 def SELECTf64ir : NVPTXInst<(outs Float64Regs:$dst), 1735 (ins f64imm:$a, Float64Regs:$b, Int1Regs:$p), 1736 "selp.f64 \t$dst, $a, $b, $p;", 1737 [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float64Regs:$b))]>; 1738 def SELECTf64ii : NVPTXInst<(outs Float64Regs:$dst), 1739 (ins f64imm:$a, f64imm:$b, Int1Regs:$p), 1740 "selp.f64 \t $dst, $a, $b, $p;", 1741 [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>; 1742 1743 //def ld_param : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad, 1744 // [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 1745 1746 def SDTDeclareParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, 1747 SDTCisInt<2>]>; 1748 def SDTDeclareScalarParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, 1749 SDTCisInt<1>, SDTCisInt<2>]>; 1750 def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; 1751 def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>; 1752 def SDTPrintCallUniProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>; 1753 def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>; 1754 def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>; 1755 def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>; 1756 def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>; 1757 def SDTCallVoidProfile : SDTypeProfile<0, 1, []>; 1758 def SDTCallValProfile : SDTypeProfile<1, 0, []>; 1759 def SDTMoveParamProfile : SDTypeProfile<1, 1, []>; 1760 def SDTMoveRetvalProfile : SDTypeProfile<0, 1, []>; 1761 def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>; 1762 def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>; 1763 1764 def DeclareParam : SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile, 1765 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1766 def DeclareScalarParam : SDNode<"NVPTXISD::DeclareScalarParam", 1767 SDTDeclareScalarParamProfile, 1768 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1769 def DeclareRetParam : SDNode<"NVPTXISD::DeclareRetParam", 1770 SDTDeclareParamProfile, 1771 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1772 def DeclareRet : SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile, 1773 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1774 def LoadParam : SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile, 1775 [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; 1776 def PrintCall : SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile, 1777 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1778 def PrintCallUni : SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile, 1779 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1780 def StoreParam : SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile, 1781 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1782 def StoreParamU32 : SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile, 1783 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1784 def StoreParamS32 : SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile, 1785 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1786 def MoveToParam : SDNode<"NVPTXISD::MoveToParam", SDTStoreParamProfile, 1787 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1788 def CallArgBegin : SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile, 1789 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1790 def CallArg : SDNode<"NVPTXISD::CallArg", SDTCallArgProfile, 1791 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1792 def LastCallArg : SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile, 1793 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1794 def CallArgEnd : SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile, 1795 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1796 def CallVoid : SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile, 1797 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1798 def Prototype : SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile, 1799 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1800 def CallVal : SDNode<"NVPTXISD::CallVal", SDTCallValProfile, 1801 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1802 def MoveParam : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, 1803 []>; 1804 def MoveRetval : SDNode<"NVPTXISD::MoveRetval", SDTMoveRetvalProfile, 1805 [SDNPHasChain, SDNPSideEffect]>; 1806 def StoreRetval : SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile, 1807 [SDNPHasChain, SDNPSideEffect]>; 1808 def MoveToRetval : SDNode<"NVPTXISD::MoveToRetval", SDTStoreRetvalProfile, 1809 [SDNPHasChain, SDNPSideEffect]>; 1810 def PseudoUseParam : SDNode<"NVPTXISD::PseudoUseParam", 1811 SDTPseudoUseParamProfile, 1812 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1813 def RETURNNode : SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile, 1814 [SDNPHasChain, SDNPSideEffect]>; 1815 1816 class LoadParamMemInst<NVPTXRegClass regclass, string opstr> : 1817 NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), 1818 !strconcat(!strconcat("ld.param", opstr), 1819 "\t$dst, [retval0+$b];"), 1820 [(set regclass:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>; 1821 1822 class LoadParamRegInst<NVPTXRegClass regclass, string opstr> : 1823 NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), 1824 !strconcat(!strconcat("mov", opstr), 1825 "\t$dst, retval$b;"), 1826 [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>; 1827 1828 class StoreParamInst<NVPTXRegClass regclass, string opstr> : 1829 NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), 1830 !strconcat(!strconcat("st.param", opstr), 1831 "\t[param$a+$b], $val;"), 1832 [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>; 1833 1834 class MoveToParamInst<NVPTXRegClass regclass, string opstr> : 1835 NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), 1836 !strconcat(!strconcat("mov", opstr), 1837 "\tparam$a, $val;"), 1838 [(MoveToParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>; 1839 1840 class StoreRetvalInst<NVPTXRegClass regclass, string opstr> : 1841 NVPTXInst<(outs), (ins regclass:$val, i32imm:$a), 1842 !strconcat(!strconcat("st.param", opstr), 1843 "\t[func_retval0+$a], $val;"), 1844 [(StoreRetval (i32 imm:$a), regclass:$val)]>; 1845 1846 class MoveToRetvalInst<NVPTXRegClass regclass, string opstr> : 1847 NVPTXInst<(outs), (ins i32imm:$num, regclass:$val), 1848 !strconcat(!strconcat("mov", opstr), 1849 "\tfunc_retval$num, $val;"), 1850 [(MoveToRetval (i32 imm:$num), regclass:$val)]>; 1851 1852 class MoveRetvalInst<NVPTXRegClass regclass, string opstr> : 1853 NVPTXInst<(outs), (ins regclass:$val), 1854 !strconcat(!strconcat("mov", opstr), 1855 "\tfunc_retval0, $val;"), 1856 [(MoveRetval regclass:$val)]>; 1857 1858 def PrintCallRetInst1 : NVPTXInst<(outs), (ins), 1859 "call (retval0), ", 1860 [(PrintCall (i32 1))]>; 1861 def PrintCallRetInst2 : NVPTXInst<(outs), (ins), 1862 "call (retval0, retval1), ", 1863 [(PrintCall (i32 2))]>; 1864 def PrintCallRetInst3 : NVPTXInst<(outs), (ins), 1865 "call (retval0, retval1, retval2), ", 1866 [(PrintCall (i32 3))]>; 1867 def PrintCallRetInst4 : NVPTXInst<(outs), (ins), 1868 "call (retval0, retval1, retval2, retval3), ", 1869 [(PrintCall (i32 4))]>; 1870 def PrintCallRetInst5 : NVPTXInst<(outs), (ins), 1871 "call (retval0, retval1, retval2, retval3, retval4), ", 1872 [(PrintCall (i32 5))]>; 1873 def PrintCallRetInst6 : NVPTXInst<(outs), (ins), 1874 "call (retval0, retval1, retval2, retval3, retval4, retval5), ", 1875 [(PrintCall (i32 6))]>; 1876 def PrintCallRetInst7 : NVPTXInst<(outs), (ins), 1877 "call (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ", 1878 [(PrintCall (i32 7))]>; 1879 def PrintCallRetInst8 : NVPTXInst<(outs), (ins), 1880 !strconcat("call (retval0, retval1, retval2, retval3, retval4", 1881 ", retval5, retval6, retval7), "), 1882 [(PrintCall (i32 8))]>; 1883 1884 def PrintCallNoRetInst : NVPTXInst<(outs), (ins), "call ", 1885 [(PrintCall (i32 0))]>; 1886 1887 def PrintCallUniRetInst1 : NVPTXInst<(outs), (ins), 1888 "call.uni (retval0), ", 1889 [(PrintCallUni (i32 1))]>; 1890 def PrintCallUniRetInst2 : NVPTXInst<(outs), (ins), 1891 "call.uni (retval0, retval1), ", 1892 [(PrintCallUni (i32 2))]>; 1893 def PrintCallUniRetInst3 : NVPTXInst<(outs), (ins), 1894 "call.uni (retval0, retval1, retval2), ", 1895 [(PrintCallUni (i32 3))]>; 1896 def PrintCallUniRetInst4 : NVPTXInst<(outs), (ins), 1897 "call.uni (retval0, retval1, retval2, retval3), ", 1898 [(PrintCallUni (i32 4))]>; 1899 def PrintCallUniRetInst5 : NVPTXInst<(outs), (ins), 1900 "call.uni (retval0, retval1, retval2, retval3, retval4), ", 1901 [(PrintCallUni (i32 5))]>; 1902 def PrintCallUniRetInst6 : NVPTXInst<(outs), (ins), 1903 "call.uni (retval0, retval1, retval2, retval3, retval4, retval5), ", 1904 [(PrintCallUni (i32 6))]>; 1905 def PrintCallUniRetInst7 : NVPTXInst<(outs), (ins), 1906 "call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ", 1907 [(PrintCallUni (i32 7))]>; 1908 def PrintCallUniRetInst8 : NVPTXInst<(outs), (ins), 1909 !strconcat("call.uni (retval0, retval1, retval2, retval3, retval4", 1910 ", retval5, retval6, retval7), "), 1911 [(PrintCallUni (i32 8))]>; 1912 1913 def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), "call.uni ", 1914 [(PrintCallUni (i32 0))]>; 1915 1916 def LoadParamMemI64 : LoadParamMemInst<Int64Regs, ".b64">; 1917 def LoadParamMemI32 : LoadParamMemInst<Int32Regs, ".b32">; 1918 def LoadParamMemI16 : LoadParamMemInst<Int16Regs, ".b16">; 1919 def LoadParamMemI8 : LoadParamMemInst<Int8Regs, ".b8">; 1920 1921 //def LoadParamMemI16 : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b), 1922 // !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t", 1923 // "cvt.u16.u32\t$dst, temp_param_reg;"), 1924 // [(set Int16Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>; 1925 //def LoadParamMemI8 : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b), 1926 // !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t", 1927 // "cvt.u16.u32\t$dst, temp_param_reg;"), 1928 // [(set Int8Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>; 1929 1930 def LoadParamMemF32 : LoadParamMemInst<Float32Regs, ".f32">; 1931 def LoadParamMemF64 : LoadParamMemInst<Float64Regs, ".f64">; 1932 1933 def LoadParamRegI64 : LoadParamRegInst<Int64Regs, ".b64">; 1934 def LoadParamRegI32 : LoadParamRegInst<Int32Regs, ".b32">; 1935 def LoadParamRegI16 : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b), 1936 "cvt.u16.u32\t$dst, retval$b;", 1937 [(set Int16Regs:$dst, 1938 (LoadParam (i32 0), (i32 imm:$b)))]>; 1939 def LoadParamRegI8 : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b), 1940 "cvt.u16.u32\t$dst, retval$b;", 1941 [(set Int8Regs:$dst, 1942 (LoadParam (i32 0), (i32 imm:$b)))]>; 1943 1944 def LoadParamRegF32 : LoadParamRegInst<Float32Regs, ".f32">; 1945 def LoadParamRegF64 : LoadParamRegInst<Float64Regs, ".f64">; 1946 1947 def StoreParamI64 : StoreParamInst<Int64Regs, ".b64">; 1948 def StoreParamI32 : StoreParamInst<Int32Regs, ".b32">; 1949 1950 def StoreParamI16 : NVPTXInst<(outs), 1951 (ins Int16Regs:$val, i32imm:$a, i32imm:$b), 1952 "st.param.b16\t[param$a+$b], $val;", 1953 [(StoreParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>; 1954 1955 def StoreParamI8 : NVPTXInst<(outs), 1956 (ins Int8Regs:$val, i32imm:$a, i32imm:$b), 1957 "st.param.b8\t[param$a+$b], $val;", 1958 [(StoreParam 1959 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>; 1960 1961 def StoreParamS32I16 : NVPTXInst<(outs), 1962 (ins Int16Regs:$val, i32imm:$a, i32imm:$b), 1963 !strconcat("cvt.s32.s16\ttemp_param_reg, $val;\n\t", 1964 "st.param.b32\t[param$a+$b], temp_param_reg;"), 1965 [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>; 1966 def StoreParamU32I16 : NVPTXInst<(outs), 1967 (ins Int16Regs:$val, i32imm:$a, i32imm:$b), 1968 !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t", 1969 "st.param.b32\t[param$a+$b], temp_param_reg;"), 1970 [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>; 1971 1972 def StoreParamU32I8 : NVPTXInst<(outs), 1973 (ins Int8Regs:$val, i32imm:$a, i32imm:$b), 1974 !strconcat("cvt.u32.u8\ttemp_param_reg, $val;\n\t", 1975 "st.param.b32\t[param$a+$b], temp_param_reg;"), 1976 [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>; 1977 def StoreParamS32I8 : NVPTXInst<(outs), 1978 (ins Int8Regs:$val, i32imm:$a, i32imm:$b), 1979 !strconcat("cvt.s32.s8\ttemp_param_reg, $val;\n\t", 1980 "st.param.b32\t[param$a+$b], temp_param_reg;"), 1981 [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>; 1982 1983 def StoreParamF32 : StoreParamInst<Float32Regs, ".f32">; 1984 def StoreParamF64 : StoreParamInst<Float64Regs, ".f64">; 1985 1986 def MoveToParamI64 : MoveToParamInst<Int64Regs, ".b64">; 1987 def MoveToParamI32 : MoveToParamInst<Int32Regs, ".b32">; 1988 def MoveToParamF64 : MoveToParamInst<Float64Regs, ".f64">; 1989 def MoveToParamF32 : MoveToParamInst<Float32Regs, ".f32">; 1990 def MoveToParamI16 : NVPTXInst<(outs), 1991 (ins Int16Regs:$val, i32imm:$a, i32imm:$b), 1992 !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t", 1993 "mov.b32\tparam$a, temp_param_reg;"), 1994 [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>; 1995 def MoveToParamI8 : NVPTXInst<(outs), 1996 (ins Int8Regs:$val, i32imm:$a, i32imm:$b), 1997 !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t", 1998 "mov.b32\tparam$a, temp_param_reg;"), 1999 [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>; 2000 2001 def StoreRetvalI64 : StoreRetvalInst<Int64Regs, ".b64">; 2002 def StoreRetvalI32 : StoreRetvalInst<Int32Regs, ".b32">; 2003 def StoreRetvalI16 : StoreRetvalInst<Int16Regs, ".b16">; 2004 def StoreRetvalI8 : StoreRetvalInst<Int8Regs, ".b8">; 2005 2006 //def StoreRetvalI16 : NVPTXInst<(outs), (ins Int16Regs:$val, i32imm:$a), 2007 // !strconcat("\{\n\t", 2008 // !strconcat(".reg .b32 temp_retval_reg;\n\t", 2009 // !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t", 2010 // "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))), 2011 // [(StoreRetval (i32 imm:$a), Int16Regs:$val)]>; 2012 //def StoreRetvalI8 : NVPTXInst<(outs), (ins Int8Regs:$val, i32imm:$a), 2013 // !strconcat("\{\n\t", 2014 // !strconcat(".reg .b32 temp_retval_reg;\n\t", 2015 // !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t", 2016 // "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))), 2017 // [(StoreRetval (i32 imm:$a), Int8Regs:$val)]>; 2018 2019 def StoreRetvalF64 : StoreRetvalInst<Float64Regs, ".f64">; 2020 def StoreRetvalF32 : StoreRetvalInst<Float32Regs, ".f32">; 2021 2022 def MoveRetvalI64 : MoveRetvalInst<Int64Regs, ".b64">; 2023 def MoveRetvalI32 : MoveRetvalInst<Int32Regs, ".b32">; 2024 def MoveRetvalI16 : MoveRetvalInst<Int16Regs, ".b16">; 2025 def MoveRetvalI8 : MoveRetvalInst<Int8Regs, ".b8">; 2026 def MoveRetvalF64 : MoveRetvalInst<Float64Regs, ".f64">; 2027 def MoveRetvalF32 : MoveRetvalInst<Float32Regs, ".f32">; 2028 2029 def MoveToRetvalI64 : MoveToRetvalInst<Int64Regs, ".b64">; 2030 def MoveToRetvalI32 : MoveToRetvalInst<Int32Regs, ".b32">; 2031 def MoveToRetvalF64 : MoveToRetvalInst<Float64Regs, ".f64">; 2032 def MoveToRetvalF32 : MoveToRetvalInst<Float32Regs, ".f32">; 2033 def MoveToRetvalI16 : NVPTXInst<(outs), (ins i32imm:$num, Int16Regs:$val), 2034 "cvt.u32.u16\tfunc_retval$num, $val;", 2035 [(MoveToRetval (i32 imm:$num), Int16Regs:$val)]>; 2036 def MoveToRetvalI8 : NVPTXInst<(outs), (ins i32imm:$num, Int8Regs:$val), 2037 "cvt.u32.u16\tfunc_retval$num, $val;", 2038 [(MoveToRetval (i32 imm:$num), Int8Regs:$val)]>; 2039 2040 def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>; 2041 def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>; 2042 def CallArgEndInst0 : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>; 2043 def RETURNInst : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>; 2044 2045 class CallArgInst<NVPTXRegClass regclass> : 2046 NVPTXInst<(outs), (ins regclass:$a), "$a, ", 2047 [(CallArg (i32 0), regclass:$a)]>; 2048 2049 class LastCallArgInst<NVPTXRegClass regclass> : 2050 NVPTXInst<(outs), (ins regclass:$a), "$a", 2051 [(LastCallArg (i32 0), regclass:$a)]>; 2052 2053 def CallArgI64 : CallArgInst<Int64Regs>; 2054 def CallArgI32 : CallArgInst<Int32Regs>; 2055 def CallArgI16 : CallArgInst<Int16Regs>; 2056 def CallArgI8 : CallArgInst<Int8Regs>; 2057 2058 def CallArgF64 : CallArgInst<Float64Regs>; 2059 def CallArgF32 : CallArgInst<Float32Regs>; 2060 2061 def LastCallArgI64 : LastCallArgInst<Int64Regs>; 2062 def LastCallArgI32 : LastCallArgInst<Int32Regs>; 2063 def LastCallArgI16 : LastCallArgInst<Int16Regs>; 2064 def LastCallArgI8 : LastCallArgInst<Int8Regs>; 2065 2066 def LastCallArgF64 : LastCallArgInst<Float64Regs>; 2067 def LastCallArgF32 : LastCallArgInst<Float32Regs>; 2068 2069 def CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ", 2070 [(CallArg (i32 0), (i32 imm:$a))]>; 2071 def LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a", 2072 [(LastCallArg (i32 0), (i32 imm:$a))]>; 2073 2074 def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ", 2075 [(CallArg (i32 1), (i32 imm:$a))]>; 2076 def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a", 2077 [(LastCallArg (i32 1), (i32 imm:$a))]>; 2078 2079 def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr), 2080 "$addr, ", 2081 [(CallVoid (Wrapper tglobaladdr:$addr))]>; 2082 def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr), 2083 "$addr, ", 2084 [(CallVoid Int32Regs:$addr)]>; 2085 def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr), 2086 "$addr, ", 2087 [(CallVoid Int64Regs:$addr)]>; 2088 def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val), 2089 ", prototype_$val;", 2090 [(Prototype (i32 imm:$val))]>; 2091 2092 def DeclareRetMemInst : NVPTXInst<(outs), 2093 (ins i32imm:$align, i32imm:$size, i32imm:$num), 2094 ".param .align $align .b8 retval$num[$size];", 2095 [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>; 2096 def DeclareRetScalarInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num), 2097 ".param .b$size retval$num;", 2098 [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>; 2099 def DeclareRetRegInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num), 2100 ".reg .b$size retval$num;", 2101 [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>; 2102 2103 def DeclareParamInst : NVPTXInst<(outs), 2104 (ins i32imm:$align, i32imm:$a, i32imm:$size), 2105 ".param .align $align .b8 param$a[$size];", 2106 [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>; 2107 def DeclareScalarParamInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size), 2108 ".param .b$size param$a;", 2109 [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>; 2110 def DeclareScalarRegInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size), 2111 ".reg .b$size param$a;", 2112 [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>; 2113 2114 class MoveParamInst<NVPTXRegClass regclass, string asmstr> : 2115 NVPTXInst<(outs regclass:$dst), (ins regclass:$src), 2116 !strconcat(!strconcat("mov", asmstr), "\t$dst, $src;"), 2117 [(set regclass:$dst, (MoveParam regclass:$src))]>; 2118 2119 def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">; 2120 def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">; 2121 def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), 2122 "cvt.u16.u32\t$dst, $src;", 2123 [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>; 2124 def MoveParamI8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src), 2125 "cvt.u16.u32\t$dst, $src;", 2126 [(set Int8Regs:$dst, (MoveParam Int8Regs:$src))]>; 2127 def MoveParamF64 : MoveParamInst<Float64Regs, ".f64">; 2128 def MoveParamF32 : MoveParamInst<Float32Regs, ".f32">; 2129 2130 class PseudoUseParamInst<NVPTXRegClass regclass> : 2131 NVPTXInst<(outs), (ins regclass:$src), 2132 "// Pseudo use of $src", 2133 [(PseudoUseParam regclass:$src)]>; 2134 2135 def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs>; 2136 def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs>; 2137 def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>; 2138 def PseudoUseParamI8 : PseudoUseParamInst<Int8Regs>; 2139 def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>; 2140 def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>; 2141 2142 2143 // 2144 // Load / Store Handling 2145 // 2146 multiclass LD<NVPTXRegClass regclass> { 2147 def _avar : NVPTXInst<(outs regclass:$dst), 2148 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2149 i32imm:$fromWidth, imem:$addr), 2150 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2151 "$fromWidth \t$dst, [$addr];"), []>; 2152 def _areg : NVPTXInst<(outs regclass:$dst), 2153 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2154 i32imm:$fromWidth, Int32Regs:$addr), 2155 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2156 "$fromWidth \t$dst, [$addr];"), []>; 2157 def _areg_64 : NVPTXInst<(outs regclass:$dst), 2158 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2159 i32imm:$fromWidth, Int64Regs:$addr), 2160 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth", 2161 " \t$dst, [$addr];"), []>; 2162 def _ari : NVPTXInst<(outs regclass:$dst), 2163 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2164 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), 2165 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2166 "$fromWidth \t$dst, [$addr+$offset];"), []>; 2167 def _ari_64 : NVPTXInst<(outs regclass:$dst), 2168 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2169 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), 2170 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth", 2171 " \t$dst, [$addr+$offset];"), []>; 2172 def _asi : NVPTXInst<(outs regclass:$dst), 2173 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2174 i32imm:$fromWidth, imem:$addr, i32imm:$offset), 2175 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2176 "$fromWidth \t$dst, [$addr+$offset];"), []>; 2177 } 2178 2179 let mayLoad=1, neverHasSideEffects=1 in { 2180 defm LD_i8 : LD<Int8Regs>; 2181 defm LD_i16 : LD<Int16Regs>; 2182 defm LD_i32 : LD<Int32Regs>; 2183 defm LD_i64 : LD<Int64Regs>; 2184 defm LD_f32 : LD<Float32Regs>; 2185 defm LD_f64 : LD<Float64Regs>; 2186 } 2187 2188 multiclass ST<NVPTXRegClass regclass> { 2189 def _avar : NVPTXInst<(outs), 2190 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 2191 LdStCode:$Sign, i32imm:$toWidth, imem:$addr), 2192 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", 2193 " \t[$addr], $src;"), []>; 2194 def _areg : NVPTXInst<(outs), 2195 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 2196 LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr), 2197 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", 2198 " \t[$addr], $src;"), []>; 2199 def _areg_64 : NVPTXInst<(outs), 2200 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 2201 LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr), 2202 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ", 2203 "\t[$addr], $src;"), []>; 2204 def _ari : NVPTXInst<(outs), 2205 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 2206 LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset), 2207 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", 2208 " \t[$addr+$offset], $src;"), []>; 2209 def _ari_64 : NVPTXInst<(outs), 2210 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 2211 LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset), 2212 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ", 2213 "\t[$addr+$offset], $src;"), []>; 2214 def _asi : NVPTXInst<(outs), 2215 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 2216 LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset), 2217 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", 2218 " \t[$addr+$offset], $src;"), []>; 2219 } 2220 2221 let mayStore=1, neverHasSideEffects=1 in { 2222 defm ST_i8 : ST<Int8Regs>; 2223 defm ST_i16 : ST<Int16Regs>; 2224 defm ST_i32 : ST<Int32Regs>; 2225 defm ST_i64 : ST<Int64Regs>; 2226 defm ST_f32 : ST<Float32Regs>; 2227 defm ST_f64 : ST<Float64Regs>; 2228 } 2229 2230 // The following is used only in and after vector elementizations. 2231 // Vector elementization happens at the machine instruction level, so the 2232 // following instruction 2233 // never appears in the DAG. 2234 multiclass LD_VEC<NVPTXRegClass regclass> { 2235 def _v2_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2236 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2237 i32imm:$fromWidth, imem:$addr), 2238 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2239 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>; 2240 def _v2_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2241 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2242 i32imm:$fromWidth, Int32Regs:$addr), 2243 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2244 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>; 2245 def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2246 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2247 i32imm:$fromWidth, Int64Regs:$addr), 2248 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2249 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>; 2250 def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2251 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2252 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), 2253 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2254 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>; 2255 def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2256 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2257 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), 2258 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2259 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>; 2260 def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2261 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2262 i32imm:$fromWidth, imem:$addr, i32imm:$offset), 2263 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2264 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>; 2265 def _v4_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, 2266 regclass:$dst3, regclass:$dst4), 2267 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2268 i32imm:$fromWidth, imem:$addr), 2269 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2270 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>; 2271 def _v4_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2272 regclass:$dst4), 2273 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2274 i32imm:$fromWidth, Int32Regs:$addr), 2275 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2276 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>; 2277 def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, 2278 regclass:$dst3, regclass:$dst4), 2279 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2280 i32imm:$fromWidth, Int64Regs:$addr), 2281 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2282 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>; 2283 def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2284 regclass:$dst4), 2285 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2286 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), 2287 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2288 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"), 2289 []>; 2290 def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, 2291 regclass:$dst3, regclass:$dst4), 2292 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2293 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), 2294 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2295 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"), 2296 []>; 2297 def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2298 regclass:$dst4), 2299 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2300 i32imm:$fromWidth, imem:$addr, i32imm:$offset), 2301 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2302 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"), 2303 []>; 2304 } 2305 let mayLoad=1, neverHasSideEffects=1 in { 2306 defm LDV_i8 : LD_VEC<Int8Regs>; 2307 defm LDV_i16 : LD_VEC<Int16Regs>; 2308 defm LDV_i32 : LD_VEC<Int32Regs>; 2309 defm LDV_i64 : LD_VEC<Int64Regs>; 2310 defm LDV_f32 : LD_VEC<Float32Regs>; 2311 defm LDV_f64 : LD_VEC<Float64Regs>; 2312 } 2313 2314 multiclass ST_VEC<NVPTXRegClass regclass> { 2315 def _v2_avar : NVPTXInst<(outs), 2316 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 2317 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr), 2318 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2319 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>; 2320 def _v2_areg : NVPTXInst<(outs), 2321 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 2322 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr), 2323 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2324 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>; 2325 def _v2_areg_64 : NVPTXInst<(outs), 2326 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 2327 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr), 2328 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2329 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>; 2330 def _v2_ari : NVPTXInst<(outs), 2331 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 2332 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, 2333 i32imm:$offset), 2334 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2335 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>; 2336 def _v2_ari_64 : NVPTXInst<(outs), 2337 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 2338 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, 2339 i32imm:$offset), 2340 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2341 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>; 2342 def _v2_asi : NVPTXInst<(outs), 2343 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 2344 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, 2345 i32imm:$offset), 2346 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2347 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>; 2348 def _v4_avar : NVPTXInst<(outs), 2349 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 2350 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2351 i32imm:$fromWidth, imem:$addr), 2352 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2353 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>; 2354 def _v4_areg : NVPTXInst<(outs), 2355 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 2356 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2357 i32imm:$fromWidth, Int32Regs:$addr), 2358 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2359 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>; 2360 def _v4_areg_64 : NVPTXInst<(outs), 2361 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 2362 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2363 i32imm:$fromWidth, Int64Regs:$addr), 2364 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2365 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>; 2366 def _v4_ari : NVPTXInst<(outs), 2367 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 2368 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2369 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), 2370 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2371 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"), 2372 []>; 2373 def _v4_ari_64 : NVPTXInst<(outs), 2374 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 2375 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2376 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), 2377 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2378 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"), 2379 []>; 2380 def _v4_asi : NVPTXInst<(outs), 2381 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 2382 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2383 i32imm:$fromWidth, imem:$addr, i32imm:$offset), 2384 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2385 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"), 2386 []>; 2387 } 2388 let mayStore=1, neverHasSideEffects=1 in { 2389 defm STV_i8 : ST_VEC<Int8Regs>; 2390 defm STV_i16 : ST_VEC<Int16Regs>; 2391 defm STV_i32 : ST_VEC<Int32Regs>; 2392 defm STV_i64 : ST_VEC<Int64Regs>; 2393 defm STV_f32 : ST_VEC<Float32Regs>; 2394 defm STV_f64 : ST_VEC<Float64Regs>; 2395 } 2396 2397 2398 //---- Conversion ---- 2399 2400 multiclass CVT_INT_TO_FP <string OpStr, SDNode OpNode> { 2401 // FIXME: need to add f16 support 2402 // def CVTf16i8 : 2403 // NVPTXInst<(outs Float16Regs:$d), (ins Int8Regs:$a), 2404 // !strconcat(!strconcat("cvt.rn.f16.", OpStr), "8 \t$d, $a;"), 2405 // [(set Float16Regs:$d, (OpNode Int8Regs:$a))]>; 2406 // def CVTf16i16 : 2407 // NVPTXInst<(outs Float16Regs:$d), (ins Int16Regs:$a), 2408 // !strconcat(!strconcat("cvt.rn.f16.", OpStr), "16 \t$d, $a;"), 2409 // [(set Float16Regs:$d, (OpNode Int16Regs:$a))]>; 2410 // def CVTf16i32 : 2411 // NVPTXInst<(outs Float16Regs:$d), (ins Int32Regs:$a), 2412 // !strconcat(!strconcat("cvt.rn.f16.", OpStr), "32 \t$d, $a;"), 2413 // [(set Float16Regs:$d, (OpNode Int32Regs:$a))]>; 2414 // def CVTf16i64: 2415 // NVPTXInst<(outs Float16Regs:$d), (ins Int64Regs:$a), 2416 // !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"), 2417 // [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>; 2418 2419 def CVTf32i1 : 2420 NVPTXInst<(outs Float32Regs:$d), (ins Int1Regs:$a), 2421 "selp.f32 \t$d, 1.0, 0.0, $a;", 2422 [(set Float32Regs:$d, (OpNode Int1Regs:$a))]>; 2423 def CVTf32i8 : 2424 NVPTXInst<(outs Float32Regs:$d), (ins Int8Regs:$a), 2425 !strconcat(!strconcat("cvt.rn.f32.", OpStr), "8 \t$d, $a;"), 2426 [(set Float32Regs:$d, (OpNode Int8Regs:$a))]>; 2427 def CVTf32i16 : 2428 NVPTXInst<(outs Float32Regs:$d), (ins Int16Regs:$a), 2429 !strconcat(!strconcat("cvt.rn.f32.", OpStr), "16 \t$d, $a;"), 2430 [(set Float32Regs:$d, (OpNode Int16Regs:$a))]>; 2431 def CVTf32i32 : 2432 NVPTXInst<(outs Float32Regs:$d), (ins Int32Regs:$a), 2433 !strconcat(!strconcat("cvt.rn.f32.", OpStr), "32 \t$d, $a;"), 2434 [(set Float32Regs:$d, (OpNode Int32Regs:$a))]>; 2435 def CVTf32i64: 2436 NVPTXInst<(outs Float32Regs:$d), (ins Int64Regs:$a), 2437 !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"), 2438 [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>; 2439 2440 def CVTf64i1 : 2441 NVPTXInst<(outs Float64Regs:$d), (ins Int1Regs:$a), 2442 "selp.f64 \t$d, 1.0, 0.0, $a;", 2443 [(set Float64Regs:$d, (OpNode Int1Regs:$a))]>; 2444 def CVTf64i8 : 2445 NVPTXInst<(outs Float64Regs:$d), (ins Int8Regs:$a), 2446 !strconcat(!strconcat("cvt.rn.f64.", OpStr), "8 \t$d, $a;"), 2447 [(set Float64Regs:$d, (OpNode Int8Regs:$a))]>; 2448 def CVTf64i16 : 2449 NVPTXInst<(outs Float64Regs:$d), (ins Int16Regs:$a), 2450 !strconcat(!strconcat("cvt.rn.f64.", OpStr), "16 \t$d, $a;"), 2451 [(set Float64Regs:$d, (OpNode Int16Regs:$a))]>; 2452 def CVTf64i32 : 2453 NVPTXInst<(outs Float64Regs:$d), (ins Int32Regs:$a), 2454 !strconcat(!strconcat("cvt.rn.f64.", OpStr), "32 \t$d, $a;"), 2455 [(set Float64Regs:$d, (OpNode Int32Regs:$a))]>; 2456 def CVTf64i64: 2457 NVPTXInst<(outs Float64Regs:$d), (ins Int64Regs:$a), 2458 !strconcat(!strconcat("cvt.rn.f64.", OpStr), "64 \t$d, $a;"), 2459 [(set Float64Regs:$d, (OpNode Int64Regs:$a))]>; 2460 } 2461 2462 defm Sint_to_fp : CVT_INT_TO_FP <"s", sint_to_fp>; 2463 defm Uint_to_fp : CVT_INT_TO_FP <"u", uint_to_fp>; 2464 2465 multiclass CVT_FP_TO_INT <string OpStr, SDNode OpNode> { 2466 // FIXME: need to add f16 support 2467 // def CVTi8f16: 2468 // NVPTXInst<(outs Int8Regs:$d), (ins Float16Regs:$a), 2469 // !strconcat(!strconcat("cvt.rzi.", OpStr), "8.f16 $d, $a;"), 2470 // [(set Int8Regs:$d, (OpNode Float16Regs:$a))]>; 2471 def CVTi8f32_ftz: 2472 NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a), 2473 !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"), 2474 [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>; 2475 def CVTi8f32: 2476 NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a), 2477 !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"), 2478 [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>; 2479 def CVTi8f64: 2480 NVPTXInst<(outs Int8Regs:$d), (ins Float64Regs:$a), 2481 !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"), 2482 [(set Int8Regs:$d, (OpNode Float64Regs:$a))]>; 2483 2484 // FIXME: need to add f16 support 2485 // def CVTi16f16: 2486 // NVPTXInst<(outs Int16Regs:$d), (ins Float16Regs:$a), 2487 // !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f16 \t$d, $a;"), 2488 // [(set Int16Regs:$d, (OpNode Float16Regs:$a))]>; 2489 def CVTi16f32_ftz: 2490 NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a), 2491 !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"), 2492 [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>; 2493 def CVTi16f32: 2494 NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a), 2495 !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"), 2496 [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>; 2497 def CVTi16f64: 2498 NVPTXInst<(outs Int16Regs:$d), (ins Float64Regs:$a), 2499 !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"), 2500 [(set Int16Regs:$d, (OpNode Float64Regs:$a))]>; 2501 2502 // FIXME: need to add f16 support 2503 // def CVTi32f16: def CVTi32f16: 2504 // NVPTXInst<(outs Int32Regs:$d), (ins Float16Regs:$a), 2505 // !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f16 \t$d, $a;"), 2506 // [(set Int32Regs:$d, (OpNode Float16Regs:$a))]>; 2507 def CVTi32f32_ftz: 2508 NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a), 2509 !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "32.f32 \t$d, $a;"), 2510 [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>; 2511 def CVTi32f32: 2512 NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a), 2513 !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f32 \t$d, $a;"), 2514 [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>; 2515 def CVTi32f64: 2516 NVPTXInst<(outs Int32Regs:$d), (ins Float64Regs:$a), 2517 !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f64 \t$d, $a;"), 2518 [(set Int32Regs:$d, (OpNode Float64Regs:$a))]>; 2519 2520 // FIXME: need to add f16 support 2521 // def CVTi64f16: 2522 // NVPTXInst<(outs Int64Regs:$d), (ins Float16Regs:$a), 2523 // !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f16 \t$d, $a;"), 2524 // [(set Int64Regs:$d, (OpNode Float16Regs:$a))]>; 2525 def CVTi64f32_ftz: 2526 NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a), 2527 !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "64.f32 \t$d, $a;"), 2528 [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>; 2529 def CVTi64f32: 2530 NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a), 2531 !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f32 \t$d, $a;"), 2532 [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>; 2533 def CVTi64f64: 2534 NVPTXInst<(outs Int64Regs:$d), (ins Float64Regs:$a), 2535 !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f64 \t$d, $a;"), 2536 [(set Int64Regs:$d, (OpNode Float64Regs:$a))]>; 2537 } 2538 2539 defm Fp_to_sint : CVT_FP_TO_INT <"s", fp_to_sint>; 2540 defm Fp_to_uint : CVT_FP_TO_INT <"u", fp_to_uint>; 2541 2542 multiclass INT_EXTEND_UNSIGNED_1 <SDNode OpNode> { 2543 def ext1to8: 2544 NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a), 2545 "selp.u16 \t$d, 1, 0, $a;", 2546 [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>; 2547 def ext1to16: 2548 NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a), 2549 "selp.u16 \t$d, 1, 0, $a;", 2550 [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>; 2551 def ext1to32: 2552 NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a), 2553 "selp.u32 \t$d, 1, 0, $a;", 2554 [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>; 2555 def ext1to64: 2556 NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a), 2557 "selp.u64 \t$d, 1, 0, $a;", 2558 [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>; 2559 } 2560 2561 multiclass INT_EXTEND_SIGNED_1 <SDNode OpNode> { 2562 def ext1to8: 2563 NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a), 2564 "selp.s16 \t$d, -1, 0, $a;", 2565 [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>; 2566 def ext1to16: 2567 NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a), 2568 "selp.s16 \t$d, -1, 0, $a;", 2569 [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>; 2570 def ext1to32: 2571 NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a), 2572 "selp.s32 \t$d, -1, 0, $a;", 2573 [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>; 2574 def ext1to64: 2575 NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a), 2576 "selp.s64 \t$d, -1, 0, $a;", 2577 [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>; 2578 } 2579 2580 multiclass INT_EXTEND <string OpStr, SDNode OpNode> { 2581 // All Int8Regs are emiited as 16bit registers in ptx. 2582 // And there is no selp.u8 in ptx. 2583 def ext8to16: 2584 NVPTXInst<(outs Int16Regs:$d), (ins Int8Regs:$a), 2585 !strconcat("cvt.", !strconcat(OpStr, !strconcat("16.", 2586 !strconcat(OpStr, "8 \t$d, $a;")))), 2587 [(set Int16Regs:$d, (OpNode Int8Regs:$a))]>; 2588 def ext8to32: 2589 NVPTXInst<(outs Int32Regs:$d), (ins Int8Regs:$a), 2590 !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.", 2591 !strconcat(OpStr, "8 \t$d, $a;")))), 2592 [(set Int32Regs:$d, (OpNode Int8Regs:$a))]>; 2593 def ext8to64: 2594 NVPTXInst<(outs Int64Regs:$d), (ins Int8Regs:$a), 2595 !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.", 2596 !strconcat(OpStr, "8 \t$d, $a;")))), 2597 [(set Int64Regs:$d, (OpNode Int8Regs:$a))]>; 2598 def ext16to32: 2599 NVPTXInst<(outs Int32Regs:$d), (ins Int16Regs:$a), 2600 !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.", 2601 !strconcat(OpStr, "16 \t$d, $a;")))), 2602 [(set Int32Regs:$d, (OpNode Int16Regs:$a))]>; 2603 def ext16to64: 2604 NVPTXInst<(outs Int64Regs:$d), (ins Int16Regs:$a), 2605 !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.", 2606 !strconcat(OpStr, "16 \t$d, $a;")))), 2607 [(set Int64Regs:$d, (OpNode Int16Regs:$a))]>; 2608 def ext32to64: 2609 NVPTXInst<(outs Int64Regs:$d), (ins Int32Regs:$a), 2610 !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.", 2611 !strconcat(OpStr, "32 \t$d, $a;")))), 2612 [(set Int64Regs:$d, (OpNode Int32Regs:$a))]>; 2613 } 2614 2615 defm Sint_extend_1 : INT_EXTEND_SIGNED_1<sext>; 2616 defm Zint_extend_1 : INT_EXTEND_UNSIGNED_1<zext>; 2617 defm Aint_extend_1 : INT_EXTEND_UNSIGNED_1<anyext>; 2618 2619 defm Sint_extend : INT_EXTEND <"s", sext>; 2620 defm Zint_extend : INT_EXTEND <"u", zext>; 2621 defm Aint_extend : INT_EXTEND <"u", anyext>; 2622 2623 class TRUNC_to1_asm<string sz> { 2624 string s = !strconcat("{{\n\t", 2625 !strconcat(".reg ", 2626 !strconcat(sz, 2627 !strconcat(" temp;\n\t", 2628 !strconcat("and", 2629 !strconcat(sz, 2630 !strconcat("\t temp, $a, 1;\n\t", 2631 !strconcat("setp", 2632 !strconcat(sz, ".eq \t $d, temp, 1;\n\t}}"))))))))); 2633 } 2634 2635 def TRUNC_64to32 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 2636 "cvt.u32.u64 \t$d, $a;", 2637 [(set Int32Regs:$d, (trunc Int64Regs:$a))]>; 2638 def TRUNC_64to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int64Regs:$a), 2639 "cvt.u16.u64 \t$d, $a;", 2640 [(set Int16Regs:$d, (trunc Int64Regs:$a))]>; 2641 def TRUNC_64to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int64Regs:$a), 2642 "cvt.u8.u64 \t$d, $a;", 2643 [(set Int8Regs:$d, (trunc Int64Regs:$a))]>; 2644 def TRUNC_32to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int32Regs:$a), 2645 "cvt.u16.u32 \t$d, $a;", 2646 [(set Int16Regs:$d, (trunc Int32Regs:$a))]>; 2647 def TRUNC_32to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int32Regs:$a), 2648 "cvt.u8.u32 \t$d, $a;", 2649 [(set Int8Regs:$d, (trunc Int32Regs:$a))]>; 2650 def TRUNC_16to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int16Regs:$a), 2651 "cvt.u8.u16 \t$d, $a;", 2652 [(set Int8Regs:$d, (trunc Int16Regs:$a))]>; 2653 def TRUNC_64to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2654 TRUNC_to1_asm<".b64">.s, 2655 [(set Int1Regs:$d, (trunc Int64Regs:$a))]>; 2656 def TRUNC_32to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2657 TRUNC_to1_asm<".b32">.s, 2658 [(set Int1Regs:$d, (trunc Int32Regs:$a))]>; 2659 def TRUNC_16to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int16Regs:$a), 2660 TRUNC_to1_asm<".b16">.s, 2661 [(set Int1Regs:$d, (trunc Int16Regs:$a))]>; 2662 def TRUNC_8to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int8Regs:$a), 2663 TRUNC_to1_asm<".b16">.s, 2664 [(set Int1Regs:$d, (trunc Int8Regs:$a))]>; 2665 2666 // Select instructions 2667 def : Pat<(select Int32Regs:$pred, Int8Regs:$a, Int8Regs:$b), 2668 (SELECTi8rr Int8Regs:$a, Int8Regs:$b, (TRUNC_32to1 Int32Regs:$pred))>; 2669 def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b), 2670 (SELECTi16rr Int16Regs:$a, Int16Regs:$b, 2671 (TRUNC_32to1 Int32Regs:$pred))>; 2672 def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b), 2673 (SELECTi32rr Int32Regs:$a, Int32Regs:$b, 2674 (TRUNC_32to1 Int32Regs:$pred))>; 2675 def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b), 2676 (SELECTi64rr Int64Regs:$a, Int64Regs:$b, 2677 (TRUNC_32to1 Int32Regs:$pred))>; 2678 def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b), 2679 (SELECTf32rr Float32Regs:$a, Float32Regs:$b, 2680 (TRUNC_32to1 Int32Regs:$pred))>; 2681 def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b), 2682 (SELECTf64rr Float64Regs:$a, Float64Regs:$b, 2683 (TRUNC_32to1 Int32Regs:$pred))>; 2684 2685 class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn, 2686 NVPTXRegClass regclassOut> : 2687 NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a), 2688 !strconcat("mov.b", !strconcat(SzStr, " \t $d, $a;")), 2689 [(set regclassOut:$d, (bitconvert regclassIn:$a))]>; 2690 2691 def BITCONVERT_32_I2F : F_BITCONVERT<"32", Int32Regs, Float32Regs>; 2692 def BITCONVERT_32_F2I : F_BITCONVERT<"32", Float32Regs, Int32Regs>; 2693 def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>; 2694 def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>; 2695 2696 // pack a set of smaller int registers to a larger int register 2697 def V4I8toI32 : NVPTXInst<(outs Int32Regs:$d), 2698 (ins Int8Regs:$s1, Int8Regs:$s2, 2699 Int8Regs:$s3, Int8Regs:$s4), 2700 !strconcat("{{\n\t.reg .b8\t%t<4>;", 2701 !strconcat("\n\tcvt.u8.u8\t%t0, $s1;", 2702 !strconcat("\n\tcvt.u8.u8\t%t1, $s2;", 2703 !strconcat("\n\tcvt.u8.u8\t%t2, $s3;", 2704 !strconcat("\n\tcvt.u8.u8\t%t3, $s4;", 2705 "\n\tmov.b32\t$d, {%t0, %t1, %t2, %t3};\n\t}}"))))), 2706 []>; 2707 def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d), 2708 (ins Int16Regs:$s1, Int16Regs:$s2, 2709 Int16Regs:$s3, Int16Regs:$s4), 2710 "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};", 2711 []>; 2712 def V2I8toI16 : NVPTXInst<(outs Int16Regs:$d), 2713 (ins Int8Regs:$s1, Int8Regs:$s2), 2714 !strconcat("{{\n\t.reg .b8\t%t<2>;", 2715 !strconcat("\n\tcvt.u8.u8\t%t0, $s1;", 2716 !strconcat("\n\tcvt.u8.u8\t%t1, $s2;", 2717 "\n\tmov.b16\t$d, {%t0, %t1};\n\t}}"))), 2718 []>; 2719 def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d), 2720 (ins Int16Regs:$s1, Int16Regs:$s2), 2721 "mov.b32\t$d, {{$s1, $s2}};", 2722 []>; 2723 def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d), 2724 (ins Int32Regs:$s1, Int32Regs:$s2), 2725 "mov.b64\t$d, {{$s1, $s2}};", 2726 []>; 2727 def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d), 2728 (ins Float32Regs:$s1, Float32Regs:$s2), 2729 "mov.b64\t$d, {{$s1, $s2}};", 2730 []>; 2731 2732 // unpack a larger int register to a set of smaller int registers 2733 def I32toV4I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2, 2734 Int8Regs:$d3, Int8Regs:$d4), 2735 (ins Int32Regs:$s), 2736 !strconcat("{{\n\t.reg .b8\t%t<4>;", 2737 !strconcat("\n\tmov.b32\t{%t0, %t1, %t2, %t3}, $s;", 2738 !strconcat("\n\tcvt.u8.u8\t$d1, %t0;", 2739 !strconcat("\n\tcvt.u8.u8\t$d2, %t1;", 2740 !strconcat("\n\tcvt.u8.u8\t$d3, %t2;", 2741 "\n\tcvt.u8.u8\t$d4, %t3;\n\t}}"))))), 2742 []>; 2743 def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2, 2744 Int16Regs:$d3, Int16Regs:$d4), 2745 (ins Int64Regs:$s), 2746 "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;", 2747 []>; 2748 def I16toV2I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2), 2749 (ins Int16Regs:$s), 2750 !strconcat("{{\n\t.reg .b8\t%t<2>;", 2751 !strconcat("\n\tmov.b16\t{%t0, %t1}, $s;", 2752 !strconcat("\n\tcvt.u8.u8\t$d1, %t0;", 2753 "\n\tcvt.u8.u8\t$d2, %t1;\n\t}}"))), 2754 []>; 2755 def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2), 2756 (ins Int32Regs:$s), 2757 "mov.b32\t{{$d1, $d2}}, $s;", 2758 []>; 2759 def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2), 2760 (ins Int64Regs:$s), 2761 "mov.b64\t{{$d1, $d2}}, $s;", 2762 []>; 2763 def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2), 2764 (ins Float64Regs:$s), 2765 "mov.b64\t{{$d1, $d2}}, $s;", 2766 []>; 2767 2768 def FPRound_ftz : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a), 2769 "cvt.rn.ftz.f32.f64 \t$d, $a;", 2770 [(set Float32Regs:$d, (fround Float64Regs:$a))]>, Requires<[doF32FTZ]>; 2771 2772 def FPRound : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a), 2773 "cvt.rn.f32.f64 \t$d, $a;", 2774 [(set Float32Regs:$d, (fround Float64Regs:$a))]>; 2775 2776 def FPExtend_ftz : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a), 2777 "cvt.ftz.f64.f32 \t$d, $a;", 2778 [(set Float64Regs:$d, (fextend Float32Regs:$a))]>, Requires<[doF32FTZ]>; 2779 2780 def FPExtend : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a), 2781 "cvt.f64.f32 \t$d, $a;", 2782 [(set Float64Regs:$d, (fextend Float32Regs:$a))]>; 2783 2784 def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone, 2785 [SDNPHasChain, SDNPOptInGlue]>; 2786 2787 //----------------------------------- 2788 // Control-flow 2789 //----------------------------------- 2790 2791 let isTerminator=1 in { 2792 let isReturn=1, isBarrier=1 in 2793 def Return : NVPTXInst<(outs), (ins), "ret;", [(retflag)]>; 2794 2795 let isBranch=1 in 2796 def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target), 2797 "@$a bra \t$target;", 2798 [(brcond Int1Regs:$a, bb:$target)]>; 2799 let isBranch=1 in 2800 def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target), 2801 "@!$a bra \t$target;", 2802 []>; 2803 2804 let isBranch=1, isBarrier=1 in 2805 def GOTO : NVPTXInst<(outs), (ins brtarget:$target), 2806 "bra.uni \t$target;", 2807 [(br bb:$target)]>; 2808 } 2809 2810 def : Pat<(brcond Int32Regs:$a, bb:$target), (CBranch 2811 (ISetUNEi32ri_p Int32Regs:$a, 0), bb:$target)>; 2812 2813 // SelectionDAGBuilder::visitSWitchCase() will invert the condition of a 2814 // conditional branch if 2815 // the target block is the next block so that the code can fall through to the 2816 // target block. 2817 // The invertion is done by 'xor condition, 1', which will be translated to 2818 // (setne condition, -1). 2819 // Since ptx supports '@!pred bra target', we should use it. 2820 def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target), 2821 (CBranchOther Int1Regs:$a, bb:$target)>; 2822 2823 // Call 2824 def SDT_NVPTXCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; 2825 def SDT_NVPTXCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, 2826 SDTCisVT<1, i32> ]>; 2827 2828 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart, 2829 [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; 2830 def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd, 2831 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 2832 SDNPSideEffect]>; 2833 2834 def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; 2835 def call : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall, 2836 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; 2837 def calltarget : Operand<i32>; 2838 let isCall=1 in { 2839 def CALL : NVPTXInst<(outs), (ins calltarget:$dst), 2840 "call \t$dst, (1);", []>; 2841 } 2842 2843 def : Pat<(call tglobaladdr:$dst), 2844 (CALL tglobaladdr:$dst)>; 2845 def : Pat<(call texternalsym:$dst), 2846 (CALL texternalsym:$dst)>; 2847 2848 // Pseudo instructions. 2849 class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern> 2850 : NVPTXInst<outs, ins, asmstr, pattern>; 2851 2852 // @TODO: We use some tricks here to emit curly braces. Can we clean this up 2853 // a bit without TableGen modifications? 2854 def Callseq_Start : NVPTXInst<(outs), (ins i32imm:$amt), 2855 "// Callseq Start $amt\n\t{{\n\t.reg .b32 temp_param_reg;\n\t// <end>}}", 2856 [(callseq_start timm:$amt)]>; 2857 def Callseq_End : NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), 2858 "\n\t//{{\n\t}}// Callseq End $amt1", 2859 [(callseq_end timm:$amt1, timm:$amt2)]>; 2860 2861 // trap instruction 2862 2863 def trapinst : NVPTXInst<(outs), (ins), 2864 "trap;", 2865 [(trap)]>; 2866 2867 include "NVPTXIntrinsics.td" 2868 2869 2870 //----------------------------------- 2871 // Notes 2872 //----------------------------------- 2873 // BSWAP is currently expanded. The following is a more efficient 2874 // - for < sm_20, use vector scalar mov, as tesla support native 16-bit register 2875 // - for sm_20, use pmpt (use vector scalar mov to get the pack and 2876 // unpack). sm_20 supports native 32-bit register, but not native 16-bit 2877 // register. 2878