1 //===- NVPTXVector.td - NVPTX Vector Specific Instruction defs -*- tblgen-*-==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 //----------------------------------- 11 // Vector Specific 12 //----------------------------------- 13 14 // 15 // All vector instructions derive from NVPTXVecInst 16 // 17 18 class NVPTXVecInst<dag outs, dag ins, string asmstr, list<dag> pattern, 19 NVPTXInst sInst=NOP> 20 : NVPTXInst<outs, ins, asmstr, pattern> { 21 NVPTXInst scalarInst=sInst; 22 } 23 24 let isAsCheapAsAMove=1, VecInstType=isVecExtract.Value in { 25 // Extract v2i16 26 def V2i16Extract : NVPTXVecInst<(outs Int16Regs:$dst), 27 (ins V2I16Regs:$src, i8imm:$c), 28 "mov.u16 \t$dst, $src${c:vecelem};", 29 [(set Int16Regs:$dst, (vector_extract 30 (v2i16 V2I16Regs:$src), imm:$c))], 31 IMOV16rr>; 32 33 // Extract v4i16 34 def V4i16Extract : NVPTXVecInst<(outs Int16Regs:$dst), 35 (ins V4I16Regs:$src, i8imm:$c), 36 "mov.u16 \t$dst, $src${c:vecelem};", 37 [(set Int16Regs:$dst, (vector_extract 38 (v4i16 V4I16Regs:$src), imm:$c))], 39 IMOV16rr>; 40 41 // Extract v2i8 42 def V2i8Extract : NVPTXVecInst<(outs Int8Regs:$dst), 43 (ins V2I8Regs:$src, i8imm:$c), 44 "mov.u16 \t$dst, $src${c:vecelem};", 45 [(set Int8Regs:$dst, (vector_extract 46 (v2i8 V2I8Regs:$src), imm:$c))], 47 IMOV8rr>; 48 49 // Extract v4i8 50 def V4i8Extract : NVPTXVecInst<(outs Int8Regs:$dst), 51 (ins V4I8Regs:$src, i8imm:$c), 52 "mov.u16 \t$dst, $src${c:vecelem};", 53 [(set Int8Regs:$dst, (vector_extract 54 (v4i8 V4I8Regs:$src), imm:$c))], 55 IMOV8rr>; 56 57 // Extract v2i32 58 def V2i32Extract : NVPTXVecInst<(outs Int32Regs:$dst), 59 (ins V2I32Regs:$src, i8imm:$c), 60 "mov.u32 \t$dst, $src${c:vecelem};", 61 [(set Int32Regs:$dst, (vector_extract 62 (v2i32 V2I32Regs:$src), imm:$c))], 63 IMOV32rr>; 64 65 // Extract v2f32 66 def V2f32Extract : NVPTXVecInst<(outs Float32Regs:$dst), 67 (ins V2F32Regs:$src, i8imm:$c), 68 "mov.f32 \t$dst, $src${c:vecelem};", 69 [(set Float32Regs:$dst, (vector_extract 70 (v2f32 V2F32Regs:$src), imm:$c))], 71 FMOV32rr>; 72 73 // Extract v2i64 74 def V2i64Extract : NVPTXVecInst<(outs Int64Regs:$dst), 75 (ins V2I64Regs:$src, i8imm:$c), 76 "mov.u64 \t$dst, $src${c:vecelem};", 77 [(set Int64Regs:$dst, (vector_extract 78 (v2i64 V2I64Regs:$src), imm:$c))], 79 IMOV64rr>; 80 81 // Extract v2f64 82 def V2f64Extract : NVPTXVecInst<(outs Float64Regs:$dst), 83 (ins V2F64Regs:$src, i8imm:$c), 84 "mov.f64 \t$dst, $src${c:vecelem};", 85 [(set Float64Regs:$dst, (vector_extract 86 (v2f64 V2F64Regs:$src), imm:$c))], 87 FMOV64rr>; 88 89 // Extract v4i32 90 def V4i32Extract : NVPTXVecInst<(outs Int32Regs:$dst), 91 (ins V4I32Regs:$src, i8imm:$c), 92 "mov.u32 \t$dst, $src${c:vecelem};", 93 [(set Int32Regs:$dst, (vector_extract 94 (v4i32 V4I32Regs:$src), imm:$c))], 95 IMOV32rr>; 96 97 // Extract v4f32 98 def V4f32Extract : NVPTXVecInst<(outs Float32Regs:$dst), 99 (ins V4F32Regs:$src, i8imm:$c), 100 "mov.f32 \t$dst, $src${c:vecelem};", 101 [(set Float32Regs:$dst, (vector_extract 102 (v4f32 V4F32Regs:$src), imm:$c))], 103 FMOV32rr>; 104 } 105 106 let isAsCheapAsAMove=1, VecInstType=isVecInsert.Value in { 107 // Insert v2i8 108 def V2i8Insert : NVPTXVecInst<(outs V2I8Regs:$dst), 109 (ins V2I8Regs:$src, Int8Regs:$val, i8imm:$c), 110 "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};" 111 "\n\tmov.u16 \t$dst${c:vecelem}, $val;", 112 [(set V2I8Regs:$dst, 113 (vector_insert V2I8Regs:$src, Int8Regs:$val, imm:$c))], 114 IMOV8rr>; 115 116 // Insert v4i8 117 def V4i8Insert : NVPTXVecInst<(outs V4I8Regs:$dst), 118 (ins V4I8Regs:$src, Int8Regs:$val, i8imm:$c), 119 "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};" 120 "\n\tmov.u16 \t$dst${c:vecelem}, $val;", 121 [(set V4I8Regs:$dst, 122 (vector_insert V4I8Regs:$src, Int8Regs:$val, imm:$c))], 123 IMOV8rr>; 124 125 // Insert v2i16 126 def V2i16Insert : NVPTXVecInst<(outs V2I16Regs:$dst), 127 (ins V2I16Regs:$src, Int16Regs:$val, i8imm:$c), 128 "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};" 129 "\n\tmov.u16 \t$dst${c:vecelem}, $val;", 130 [(set V2I16Regs:$dst, 131 (vector_insert V2I16Regs:$src, Int16Regs:$val, imm:$c))], 132 IMOV16rr>; 133 134 // Insert v4i16 135 def V4i16Insert : NVPTXVecInst<(outs V4I16Regs:$dst), 136 (ins V4I16Regs:$src, Int16Regs:$val, i8imm:$c), 137 "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};" 138 "\n\tmov.u16 \t$dst${c:vecelem}, $val;", 139 [(set V4I16Regs:$dst, 140 (vector_insert V4I16Regs:$src, Int16Regs:$val, imm:$c))], 141 IMOV16rr>; 142 143 // Insert v2i32 144 def V2i32Insert : NVPTXVecInst<(outs V2I32Regs:$dst), 145 (ins V2I32Regs:$src, Int32Regs:$val, i8imm:$c), 146 "mov.v2.u32 \t${dst:vecfull}, ${src:vecfull};" 147 "\n\tmov.u32 \t$dst${c:vecelem}, $val;", 148 [(set V2I32Regs:$dst, 149 (vector_insert V2I32Regs:$src, Int32Regs:$val, imm:$c))], 150 IMOV32rr>; 151 152 // Insert v2f32 153 def V2f32Insert : NVPTXVecInst<(outs V2F32Regs:$dst), 154 (ins V2F32Regs:$src, Float32Regs:$val, i8imm:$c), 155 "mov.v2.f32 \t${dst:vecfull}, ${src:vecfull};" 156 "\n\tmov.f32 \t$dst${c:vecelem}, $val;", 157 [(set V2F32Regs:$dst, 158 (vector_insert V2F32Regs:$src, Float32Regs:$val, imm:$c))], 159 FMOV32rr>; 160 161 // Insert v2i64 162 def V2i64Insert : NVPTXVecInst<(outs V2I64Regs:$dst), 163 (ins V2I64Regs:$src, Int64Regs:$val, i8imm:$c), 164 "mov.v2.u64 \t${dst:vecfull}, ${src:vecfull};" 165 "\n\tmov.u64 \t$dst${c:vecelem}, $val;", 166 [(set V2I64Regs:$dst, 167 (vector_insert V2I64Regs:$src, Int64Regs:$val, imm:$c))], 168 IMOV64rr>; 169 170 // Insert v2f64 171 def V2f64Insert : NVPTXVecInst<(outs V2F64Regs:$dst), 172 (ins V2F64Regs:$src, Float64Regs:$val, i8imm:$c), 173 "mov.v2.f64 \t${dst:vecfull}, ${src:vecfull};" 174 "\n\tmov.f64 \t$dst${c:vecelem}, $val;", 175 [(set V2F64Regs:$dst, 176 (vector_insert V2F64Regs:$src, Float64Regs:$val, imm:$c))], 177 FMOV64rr>; 178 179 // Insert v4i32 180 def V4i32Insert : NVPTXVecInst<(outs V4I32Regs:$dst), 181 (ins V4I32Regs:$src, Int32Regs:$val, i8imm:$c), 182 "mov.v4.u32 \t${dst:vecfull}, ${src:vecfull};" 183 "\n\tmov.u32 \t$dst${c:vecelem}, $val;", 184 [(set V4I32Regs:$dst, 185 (vector_insert V4I32Regs:$src, Int32Regs:$val, imm:$c))], 186 IMOV32rr>; 187 188 // Insert v4f32 189 def V4f32Insert : NVPTXVecInst<(outs V4F32Regs:$dst), 190 (ins V4F32Regs:$src, Float32Regs:$val, i8imm:$c), 191 "mov.v4.f32 \t${dst:vecfull}, ${src:vecfull};" 192 "\n\tmov.f32 \t$dst${c:vecelem}, $val;", 193 [(set V4F32Regs:$dst, 194 (vector_insert V4F32Regs:$src, Float32Regs:$val, imm:$c))], 195 FMOV32rr>; 196 } 197 198 class BinOpAsmString<string c> { 199 string s = c; 200 } 201 202 class V4AsmStr<string opcode> : BinOpAsmString< 203 !strconcat(!strconcat(!strconcat(!strconcat( 204 !strconcat(!strconcat(!strconcat( 205 opcode, " \t${dst}_0, ${a}_0, ${b}_0;\n\t"), 206 opcode), " \t${dst}_1, ${a}_1, ${b}_1;\n\t"), 207 opcode), " \t${dst}_2, ${a}_2, ${b}_2;\n\t"), 208 opcode), " \t${dst}_3, ${a}_3, ${b}_3;")>; 209 210 class V2AsmStr<string opcode> : BinOpAsmString< 211 !strconcat(!strconcat(!strconcat( 212 opcode, " \t${dst}_0, ${a}_0, ${b}_0;\n\t"), 213 opcode), " \t${dst}_1, ${a}_1, ${b}_1;")>; 214 215 class V4MADStr<string opcode> : BinOpAsmString< 216 !strconcat(!strconcat(!strconcat(!strconcat( 217 !strconcat(!strconcat(!strconcat( 218 opcode, " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"), 219 opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;\n\t"), 220 opcode), " \t${dst}_2, ${a}_2, ${b}_2, ${c}_2;\n\t"), 221 opcode), " \t${dst}_3, ${a}_3, ${b}_3, ${c}_3;")>; 222 223 class V2MADStr<string opcode> : BinOpAsmString< 224 !strconcat(!strconcat(!strconcat( 225 opcode, " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"), 226 opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;")>; 227 228 class V4UnaryStr<string opcode> : BinOpAsmString< 229 !strconcat(!strconcat(!strconcat(!strconcat( 230 !strconcat(!strconcat(!strconcat( 231 opcode, " \t${dst}_0, ${a}_0;\n\t"), 232 opcode), " \t${dst}_1, ${a}_1;\n\t"), 233 opcode), " \t${dst}_2, ${a}_2;\n\t"), 234 opcode), " \t${dst}_3, ${a}_3;")>; 235 236 class V2UnaryStr<string opcode> : BinOpAsmString< 237 !strconcat(!strconcat(!strconcat( 238 opcode, " \t${dst}_0, ${a}_0;\n\t"), 239 opcode), " \t${dst}_1, ${a}_1;")>; 240 241 class VecBinaryOp<BinOpAsmString asmstr, SDNode OpNode, NVPTXRegClass regclass, 242 NVPTXInst sInst=NOP> : 243 NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a, regclass:$b), 244 asmstr.s, 245 [(set regclass:$dst, (OpNode regclass:$a, regclass:$b))], 246 sInst>; 247 248 class VecShiftOp<BinOpAsmString asmstr, SDNode OpNode, NVPTXRegClass regclass1, 249 NVPTXRegClass regclass2, NVPTXInst sInst=NOP> : 250 NVPTXVecInst<(outs regclass1:$dst), (ins regclass1:$a, regclass2:$b), 251 asmstr.s, 252 [(set regclass1:$dst, (OpNode regclass1:$a, regclass2:$b))], 253 sInst>; 254 255 class VecUnaryOp<BinOpAsmString asmstr, PatFrag OpNode, NVPTXRegClass regclass, 256 NVPTXInst sInst=NOP> : 257 NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a), 258 asmstr.s, 259 [(set regclass:$dst, (OpNode regclass:$a))], sInst>; 260 261 multiclass IntBinVOp<string asmstr, SDNode OpNode, 262 NVPTXInst i64op=NOP, NVPTXInst i32op=NOP, NVPTXInst 263 i16op=NOP, NVPTXInst i8op=NOP> { 264 def V2I64 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "64")>, OpNode, V2I64Regs, 265 i64op>; 266 def V4I32 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "32")>, OpNode, V4I32Regs, 267 i32op>; 268 def V2I32 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "32")>, OpNode, V2I32Regs, 269 i32op>; 270 def V4I16 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "16")>, OpNode, V4I16Regs, 271 i16op>; 272 def V2I16 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "16")>, OpNode, V2I16Regs, 273 i16op>; 274 def V4I8 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "16")>, OpNode, V4I8Regs, 275 i8op>; 276 def V2I8 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "16")>, OpNode, V2I8Regs, 277 i8op>; 278 } 279 280 multiclass FloatBinVOp<string asmstr, SDNode OpNode, 281 NVPTXInst f64=NOP, NVPTXInst f32=NOP, 282 NVPTXInst f32_ftz=NOP> { 283 def V2F64 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "f64")>, OpNode, 284 V2F64Regs, f64>; 285 def V4F32_ftz : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "ftz.f32")>, OpNode, 286 V4F32Regs, f32_ftz>, Requires<[doF32FTZ]>; 287 def V2F32_ftz : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "ftz.f32")>, OpNode, 288 V2F32Regs, f32_ftz>, Requires<[doF32FTZ]>; 289 def V4F32 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "f32")>, OpNode, 290 V4F32Regs, f32>; 291 def V2F32 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "f32")>, OpNode, 292 V2F32Regs, f32>; 293 } 294 295 multiclass IntUnaryVOp<string asmstr, PatFrag OpNode, 296 NVPTXInst i64op=NOP, NVPTXInst i32op=NOP, 297 NVPTXInst i16op=NOP, NVPTXInst i8op=NOP> { 298 def V2I64 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "64")>, OpNode, 299 V2I64Regs, i64op>; 300 def V4I32 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "32")>, OpNode, 301 V4I32Regs, i32op>; 302 def V2I32 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "32")>, OpNode, 303 V2I32Regs, i32op>; 304 def V4I16 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "16")>, OpNode, 305 V4I16Regs, i16op>; 306 def V2I16 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "16")>, OpNode, 307 V2I16Regs, i16op>; 308 def V4I8 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "16")>, OpNode, 309 V4I8Regs, i8op>; 310 def V2I8 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "16")>, OpNode, 311 V2I8Regs, i8op>; 312 } 313 314 315 // Integer Arithmetic 316 let VecInstType=isVecOther.Value in { 317 defm VAdd : IntBinVOp<"add.s", add, ADDi64rr, ADDi32rr, ADDi16rr, ADDi8rr>; 318 defm VSub : IntBinVOp<"sub.s", sub, SUBi64rr, SUBi32rr, SUBi16rr, SUBi8rr>; 319 320 def AddCCV4I32 : VecBinaryOp<V4AsmStr<"add.cc.s32">, addc, V4I32Regs, 321 ADDCCi32rr>; 322 def AddCCV2I32 : VecBinaryOp<V2AsmStr<"add.cc.s32">, addc, V2I32Regs, 323 ADDCCi32rr>; 324 def SubCCV4I32 : VecBinaryOp<V4AsmStr<"sub.cc.s32">, subc, V4I32Regs, 325 SUBCCi32rr>; 326 def SubCCV2I32 : VecBinaryOp<V2AsmStr<"sub.cc.s32">, subc, V2I32Regs, 327 SUBCCi32rr>; 328 def AddCCCV4I32 : VecBinaryOp<V4AsmStr<"addc.cc.s32">, adde, V4I32Regs, 329 ADDCCCi32rr>; 330 def AddCCCV2I32 : VecBinaryOp<V2AsmStr<"addc.cc.s32">, adde, V2I32Regs, 331 ADDCCCi32rr>; 332 def SubCCCV4I32 : VecBinaryOp<V4AsmStr<"subc.cc.s32">, sube, V4I32Regs, 333 SUBCCCi32rr>; 334 def SubCCCV2I32 : VecBinaryOp<V2AsmStr<"subc.cc.s32">, sube, V2I32Regs, 335 SUBCCCi32rr>; 336 337 def ShiftLV2I64 : VecShiftOp<V2AsmStr<"shl.b64">, shl, V2I64Regs, V2I32Regs, 338 SHLi64rr>; 339 def ShiftLV2I32 : VecShiftOp<V2AsmStr<"shl.b32">, shl, V2I32Regs, V2I32Regs, 340 SHLi32rr>; 341 def ShiftLV4I32 : VecShiftOp<V4AsmStr<"shl.b32">, shl, V4I32Regs, V4I32Regs, 342 SHLi32rr>; 343 def ShiftLV2I16 : VecShiftOp<V2AsmStr<"shl.b16">, shl, V2I16Regs, V2I32Regs, 344 SHLi16rr>; 345 def ShiftLV4I16 : VecShiftOp<V4AsmStr<"shl.b16">, shl, V4I16Regs, V4I32Regs, 346 SHLi16rr>; 347 def ShiftLV2I8 : VecShiftOp<V2AsmStr<"shl.b16">, shl, V2I8Regs, V2I32Regs, 348 SHLi8rr>; 349 def ShiftLV4I8 : VecShiftOp<V4AsmStr<"shl.b16">, shl, V4I8Regs, V4I32Regs, 350 SHLi8rr>; 351 } 352 353 // cvt to v*i32, helpers for shift 354 class CVTtoVeci32<NVPTXRegClass inclass, NVPTXRegClass outclass, string asmstr, 355 NVPTXInst sInst=NOP> : 356 NVPTXVecInst<(outs outclass:$d), (ins inclass:$s), asmstr, [], sInst>; 357 358 class VecCVTStrHelper<string op, string dest, string src> { 359 string s=!strconcat(op, !strconcat("\t", 360 !strconcat(dest, !strconcat(", ", !strconcat(src, ";"))))); 361 } 362 363 class Vec2CVTStr<string op> { 364 string s=!strconcat(VecCVTStrHelper<op, "${d}_0", "${s}_0">.s, 365 !strconcat("\n\t", VecCVTStrHelper<op, "${d}_1", "${s}_1">.s)); 366 } 367 368 class Vec4CVTStr<string op> { 369 string s=!strconcat(VecCVTStrHelper<op, "${d}_0", "${s}_0">.s, 370 !strconcat("\n\t", 371 !strconcat(VecCVTStrHelper<op, "${d}_1", "${s}_1">.s, 372 !strconcat("\n\t", 373 !strconcat(VecCVTStrHelper<op, "${d}_2", "${s}_2">.s, 374 !strconcat("\n\t", VecCVTStrHelper<op, "${d}_3", "${s}_3">.s)))))); 375 } 376 377 let VecInstType=isVecOther.Value in { 378 def CVTv2i8tov2i32 : CVTtoVeci32<V2I8Regs, V2I32Regs, 379 Vec2CVTStr<"cvt.u32.u16">.s, Zint_extendext8to32>; 380 def CVTv2i16tov2i32 : CVTtoVeci32<V2I16Regs, V2I32Regs, 381 Vec2CVTStr<"cvt.u32.u16">.s, Zint_extendext16to32>; 382 def CVTv4i8tov4i32 : CVTtoVeci32<V4I8Regs, V4I32Regs, 383 Vec4CVTStr<"cvt.u32.u16">.s, Zint_extendext8to32>; 384 def CVTv4i16tov4i32 : CVTtoVeci32<V4I16Regs, V4I32Regs, 385 Vec4CVTStr<"cvt.u32.u16">.s, Zint_extendext16to32>; 386 def CVTv2i64tov2i32 : CVTtoVeci32<V2I64Regs, V2I32Regs, 387 Vec2CVTStr<"cvt.u32.u64">.s, TRUNC_64to32>; 388 } 389 390 def : Pat<(shl V2I16Regs:$src1, V2I16Regs:$src2), 391 (ShiftLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>; 392 def : Pat<(shl V2I8Regs:$src1, V2I8Regs:$src2), 393 (ShiftLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>; 394 def : Pat<(shl V2I64Regs:$src1, V2I64Regs:$src2), 395 (ShiftLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>; 396 397 def : Pat<(shl V4I16Regs:$src1, V4I16Regs:$src2), 398 (ShiftLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>; 399 def : Pat<(shl V4I8Regs:$src1, V4I8Regs:$src2), 400 (ShiftLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>; 401 402 let VecInstType=isVecOther.Value in { 403 def ShiftRAV2I64 : VecShiftOp<V2AsmStr<"shr.s64">, sra, V2I64Regs, V2I32Regs, 404 SRAi64rr>; 405 def ShiftRAV2I32 : VecShiftOp<V2AsmStr<"shr.s32">, sra, V2I32Regs, V2I32Regs, 406 SRAi32rr>; 407 def ShiftRAV4I32 : VecShiftOp<V4AsmStr<"shr.s32">, sra, V4I32Regs, V4I32Regs, 408 SRAi32rr>; 409 def ShiftRAV2I16 : VecShiftOp<V2AsmStr<"shr.s16">, sra, V2I16Regs, V2I32Regs, 410 SRAi16rr>; 411 def ShiftRAV4I16 : VecShiftOp<V4AsmStr<"shr.s16">, sra, V4I16Regs, V4I32Regs, 412 SRAi16rr>; 413 def ShiftRAV2I8 : VecShiftOp<V2AsmStr<"shr.s16">, sra, V2I8Regs, V2I32Regs, 414 SRAi8rr>; 415 def ShiftRAV4I8 : VecShiftOp<V4AsmStr<"shr.s16">, sra, V4I8Regs, V4I32Regs, 416 SRAi8rr>; 417 418 def ShiftRLV2I64 : VecShiftOp<V2AsmStr<"shr.u64">, srl, V2I64Regs, V2I32Regs, 419 SRLi64rr>; 420 def ShiftRLV2I32 : VecShiftOp<V2AsmStr<"shr.u32">, srl, V2I32Regs, V2I32Regs, 421 SRLi32rr>; 422 def ShiftRLV4I32 : VecShiftOp<V4AsmStr<"shr.u32">, srl, V4I32Regs, V4I32Regs, 423 SRLi32rr>; 424 def ShiftRLV2I16 : VecShiftOp<V2AsmStr<"shr.u16">, srl, V2I16Regs, V2I32Regs, 425 SRLi16rr>; 426 def ShiftRLV4I16 : VecShiftOp<V4AsmStr<"shr.u16">, srl, V4I16Regs, V4I32Regs, 427 SRLi16rr>; 428 def ShiftRLV2I8 : VecShiftOp<V2AsmStr<"shr.u16">, srl, V2I8Regs, V2I32Regs, 429 SRLi8rr>; 430 def ShiftRLV4I8 : VecShiftOp<V4AsmStr<"shr.u16">, srl, V4I8Regs, V4I32Regs, 431 SRLi8rr>; 432 433 defm VMult : IntBinVOp<"mul.lo.s", mul, MULTi64rr, MULTi32rr, MULTi16rr, 434 MULTi8rr>; 435 defm VMultHS : IntBinVOp<"mul.hi.s", mulhs, MULTHSi64rr, MULTHSi32rr, 436 MULTHSi16rr, 437 MULTHSi8rr>; 438 defm VMultHU : IntBinVOp<"mul.hi.u", mulhu, MULTHUi64rr, MULTHUi32rr, 439 MULTHUi16rr, 440 MULTHUi8rr>; 441 defm VSDiv : IntBinVOp<"div.s", sdiv, SDIVi64rr, SDIVi32rr, SDIVi16rr, 442 SDIVi8rr>; 443 defm VUDiv : IntBinVOp<"div.u", udiv, UDIVi64rr, UDIVi32rr, UDIVi16rr, 444 UDIVi8rr>; 445 defm VSRem : IntBinVOp<"rem.s", srem, SREMi64rr, SREMi32rr, SREMi16rr, 446 SREMi8rr>; 447 defm VURem : IntBinVOp<"rem.u", urem, UREMi64rr, UREMi32rr, UREMi16rr, 448 UREMi8rr>; 449 } 450 451 def : Pat<(sra V2I16Regs:$src1, V2I16Regs:$src2), 452 (ShiftRAV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>; 453 def : Pat<(sra V2I8Regs:$src1, V2I8Regs:$src2), 454 (ShiftRAV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>; 455 def : Pat<(sra V2I64Regs:$src1, V2I64Regs:$src2), 456 (ShiftRAV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>; 457 458 def : Pat<(sra V4I16Regs:$src1, V4I16Regs:$src2), 459 (ShiftRAV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>; 460 def : Pat<(sra V4I8Regs:$src1, V4I8Regs:$src2), 461 (ShiftRAV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>; 462 463 def : Pat<(srl V2I16Regs:$src1, V2I16Regs:$src2), 464 (ShiftRLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>; 465 def : Pat<(srl V2I8Regs:$src1, V2I8Regs:$src2), 466 (ShiftRLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>; 467 def : Pat<(srl V2I64Regs:$src1, V2I64Regs:$src2), 468 (ShiftRLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>; 469 470 def : Pat<(srl V4I16Regs:$src1, V4I16Regs:$src2), 471 (ShiftRLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>; 472 def : Pat<(srl V4I8Regs:$src1, V4I8Regs:$src2), 473 (ShiftRLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>; 474 475 multiclass VMAD<string asmstr, NVPTXRegClass regclassv4, 476 NVPTXRegClass regclassv2, 477 SDNode an=add, SDNode mn=mul, NVPTXInst sop=NOP, 478 Predicate Pred> { 479 def V4 : NVPTXVecInst<(outs regclassv4:$dst), 480 (ins regclassv4:$a, regclassv4:$b, regclassv4:$c), 481 V4MADStr<asmstr>.s, 482 [(set regclassv4:$dst, 483 (an (mn regclassv4:$a, regclassv4:$b), regclassv4:$c))], 484 sop>, 485 Requires<[Pred]>; 486 def V2 : NVPTXVecInst<(outs regclassv2:$dst), 487 (ins regclassv2:$a, regclassv2:$b, regclassv2:$c), 488 V2MADStr<asmstr>.s, 489 [(set regclassv2:$dst, 490 (an (mn regclassv2:$a, regclassv2:$b), regclassv2:$c))], 491 sop>, 492 Requires<[Pred]>; 493 } 494 495 multiclass VMADV2Only<string asmstr, NVPTXRegClass regclass, NVPTXInst sop=NOP, 496 Predicate Pred> { 497 def V2 : NVPTXVecInst<(outs regclass:$dst), 498 (ins regclass:$a, regclass:$b, regclass:$c), 499 V2MADStr<asmstr>.s, 500 [(set regclass:$dst, (add 501 (mul regclass:$a, regclass:$b), regclass:$c))], sop>, 502 Requires<[Pred]>; 503 } 504 multiclass VFMADV2Only<string asmstr, NVPTXRegClass regclass, NVPTXInst sop=NOP, 505 Predicate Pred> { 506 def V2 : NVPTXVecInst<(outs regclass:$dst), 507 (ins regclass:$a, regclass:$b, regclass:$c), 508 V2MADStr<asmstr>.s, 509 [(set regclass:$dst, (fadd 510 (fmul regclass:$a, regclass:$b), regclass:$c))], sop>, 511 Requires<[Pred]>; 512 } 513 514 let VecInstType=isVecOther.Value in { 515 defm I8MAD : VMAD<"mad.lo.s16", V4I8Regs, V2I8Regs, add, mul, MAD8rrr, true>; 516 defm I16MAD : VMAD<"mad.lo.s16", V4I16Regs, V2I16Regs, add, mul, MAD16rrr, 517 true>; 518 defm I32MAD : VMAD<"mad.lo.s32", V4I32Regs, V2I32Regs, add, mul, MAD32rrr, 519 true>; 520 defm I64MAD : VMADV2Only<"mad.lo.s64", V2I64Regs, MAD64rrr, true>; 521 522 defm VNeg : IntUnaryVOp<"neg.s", ineg, INEG64, INEG32, INEG16, INEG8>; 523 524 defm VAddf : FloatBinVOp<"add.", fadd, FADDf64rr, FADDf32rr, FADDf32rr_ftz>; 525 defm VSubf : FloatBinVOp<"sub.", fsub, FSUBf64rr, FSUBf32rr, FSUBf32rr_ftz>; 526 defm VMulf : FloatBinVOp<"mul.", fmul, FMULf64rr, FMULf32rr, FMULf32rr_ftz>; 527 528 defm F32MAD_ftz : VMAD<"mad.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul, 529 FMAD32_ftzrrr, doFMADF32_ftz>; 530 defm F32FMA_ftz : VMAD<"fma.rn.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul, 531 FMA32_ftzrrr, doFMAF32_ftz>; 532 defm F32MAD : VMAD<"mad.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMAD32rrr, 533 doFMADF32>; 534 defm F32FMA : VMAD<"fma.rn.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMA32rrr, 535 doFMAF32>; 536 defm F64FMA : VFMADV2Only<"fma.rn.f64", V2F64Regs, FMA64rrr, doFMAF64>; 537 } 538 539 let VecInstType=isVecOther.Value in { 540 def V4F32Div_prec_ftz : VecBinaryOp<V4AsmStr<"div.rn.ftz.f32">, fdiv, V4F32Regs, 541 FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>; 542 def V2F32Div_prec_ftz : VecBinaryOp<V2AsmStr<"div.rn.ftz.f32">, fdiv, V2F32Regs, 543 FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>; 544 def V4F32Div_prec : VecBinaryOp<V4AsmStr<"div.rn.f32">, fdiv, V4F32Regs, 545 FDIV32rr_prec>, Requires<[reqPTX20]>; 546 def V2F32Div_prec : VecBinaryOp<V2AsmStr<"div.rn.f32">, fdiv, V2F32Regs, 547 FDIV32rr_prec>, Requires<[reqPTX20]>; 548 def V2F32Div_ftz : VecBinaryOp<V2AsmStr<"div.full.ftz.f32">, fdiv, V2F32Regs, 549 FDIV32rr_ftz>, Requires<[doF32FTZ]>; 550 def V4F32Div_ftz : VecBinaryOp<V4AsmStr<"div.full.ftz.f32">, fdiv, V4F32Regs, 551 FDIV32rr_ftz>, Requires<[doF32FTZ]>; 552 def V2F32Div : VecBinaryOp<V2AsmStr<"div.full.f32">, fdiv, V2F32Regs, FDIV32rr>; 553 def V4F32Div : VecBinaryOp<V4AsmStr<"div.full.f32">, fdiv, V4F32Regs, FDIV32rr>; 554 def V2F64Div : VecBinaryOp<V2AsmStr<"div.rn.f64">, fdiv, V2F64Regs, FDIV64rr>; 555 } 556 557 def fnegpat : PatFrag<(ops node:$in), (fneg node:$in)>; 558 559 let VecInstType=isVecOther.Value in { 560 def VNegv2f32_ftz : VecUnaryOp<V2UnaryStr<"neg.ftz.f32">, fnegpat, V2F32Regs, 561 FNEGf32_ftz>, Requires<[doF32FTZ]>; 562 def VNegv4f32_ftz : VecUnaryOp<V4UnaryStr<"neg.ftz.f32">, fnegpat, V4F32Regs, 563 FNEGf32_ftz>, Requires<[doF32FTZ]>; 564 def VNegv2f32 : VecUnaryOp<V2UnaryStr<"neg.f32">, fnegpat, V2F32Regs, FNEGf32>; 565 def VNegv4f32 : VecUnaryOp<V4UnaryStr<"neg.f32">, fnegpat, V4F32Regs, FNEGf32>; 566 def VNegv2f64 : VecUnaryOp<V2UnaryStr<"neg.f64">, fnegpat, V2F64Regs, FNEGf64>; 567 568 // Logical Arithmetic 569 defm VAnd : IntBinVOp<"and.b", and, ANDb64rr, ANDb32rr, ANDb16rr, ANDb8rr>; 570 defm VOr : IntBinVOp<"or.b", or, ORb64rr, ORb32rr, ORb16rr, ORb8rr>; 571 defm VXor : IntBinVOp<"xor.b", xor, XORb64rr, XORb32rr, XORb16rr, XORb8rr>; 572 573 defm VNot : IntUnaryVOp<"not.b", not, NOT64, NOT32, NOT16, NOT8>; 574 } 575 576 577 multiclass V2FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> { 578 def : Pat<(fsub V2F32Regs:$a, (fmul V2F32Regs:$b, V2F32Regs:$c)), 579 (Inst (VNegv2f32 V2F32Regs:$b), V2F32Regs:$c, V2F32Regs:$a)>, 580 Requires<[Pred]>; 581 582 def : Pat<(fsub (fmul V2F32Regs:$a, V2F32Regs:$b), V2F32Regs:$c), 583 (Inst V2F32Regs:$a, V2F32Regs:$b, (VNegv2f32 V2F32Regs:$c))>, 584 Requires<[Pred]>; 585 } 586 587 defm V2FMAF32ext_ftz : V2FPCONTRACT32_SUB_PAT<F32FMA_ftzV2, doFMAF32AGG_ftz>; 588 defm V2FMADF32ext_ftz : V2FPCONTRACT32_SUB_PAT<F32MAD_ftzV2, doFMADF32_ftz>; 589 defm V2FMAF32ext : V2FPCONTRACT32_SUB_PAT<F32FMAV2, doFMAF32AGG>; 590 defm V2FMADF32ext : V2FPCONTRACT32_SUB_PAT<F32MADV2, doFMADF32>; 591 592 multiclass V4FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> { 593 def : Pat<(fsub V4F32Regs:$a, (fmul V4F32Regs:$b, V4F32Regs:$c)), 594 (Inst (VNegv4f32 V4F32Regs:$b), V4F32Regs:$c, V4F32Regs:$a)>, 595 Requires<[Pred]>; 596 597 def : Pat<(fsub (fmul V4F32Regs:$a, V4F32Regs:$b), V4F32Regs:$c), 598 (Inst V4F32Regs:$a, V4F32Regs:$b, (VNegv4f32 V4F32Regs:$c))>, 599 Requires<[Pred]>; 600 } 601 602 defm V4FMAF32ext_ftz : V4FPCONTRACT32_SUB_PAT<F32FMA_ftzV4, doFMAF32AGG_ftz>; 603 defm V4FMADF32ext_ftz : V4FPCONTRACT32_SUB_PAT<F32MAD_ftzV4, doFMADF32_ftz>; 604 defm V4FMAF32ext : V4FPCONTRACT32_SUB_PAT<F32FMAV4, doFMAF32AGG>; 605 defm V4FMADF32ext : V4FPCONTRACT32_SUB_PAT<F32MADV4, doFMADF32>; 606 607 multiclass V2FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> { 608 def : Pat<(fsub V2F64Regs:$a, (fmul V2F64Regs:$b, V2F64Regs:$c)), 609 (Inst (VNegv2f64 V2F64Regs:$b), V2F64Regs:$c, V2F64Regs:$a)>, 610 Requires<[Pred]>; 611 612 def : Pat<(fsub (fmul V2F64Regs:$a, V2F64Regs:$b), V2F64Regs:$c), 613 (Inst V2F64Regs:$a, V2F64Regs:$b, (VNegv2f64 V2F64Regs:$c))>, 614 Requires<[Pred]>; 615 } 616 617 defm V2FMAF64ext : V2FPCONTRACT64_SUB_PAT<F64FMAV2, doFMAF64AGG>; 618 619 class VecModStr<string vecsize, string elem, string extra, string l=""> 620 { 621 string t1 = !strconcat("${c", elem); 622 string t2 = !strconcat(t1, ":vecv"); 623 string t3 = !strconcat(t2, vecsize); 624 string t4 = !strconcat(t3, extra); 625 string t5 = !strconcat(t4, l); 626 string s = !strconcat(t5, "}"); 627 } 628 class ShuffleOneLine<string vecsize, string elem, string type> 629 { 630 string t1 = VecModStr<vecsize, elem, "comm", "1">.s; 631 string t2 = !strconcat(t1, "mov."); 632 string t3 = !strconcat(t2, type); 633 string t4 = !strconcat(t3, " \t${dst}_"); 634 string t5 = !strconcat(t4, elem); 635 string t6 = !strconcat(t5, ", $src1"); 636 string t7 = !strconcat(t6, VecModStr<vecsize, elem, "pos">.s); 637 string t8 = !strconcat(t7, ";\n\t"); 638 string t9 = !strconcat(t8, VecModStr<vecsize, elem, "comm", "2">.s); 639 string t10 = !strconcat(t9, "mov."); 640 string t11 = !strconcat(t10, type); 641 string t12 = !strconcat(t11, " \t${dst}_"); 642 string t13 = !strconcat(t12, elem); 643 string t14 = !strconcat(t13, ", $src2"); 644 string t15 = !strconcat(t14, VecModStr<vecsize, elem, "pos">.s); 645 string s = !strconcat(t15, ";"); 646 } 647 class ShuffleAsmStr2<string type> 648 { 649 string t1 = ShuffleOneLine<"2", "0", type>.s; 650 string t2 = !strconcat(t1, "\n\t"); 651 string s = !strconcat(t2, ShuffleOneLine<"2", "1", type>.s); 652 } 653 class ShuffleAsmStr4<string type> 654 { 655 string t1 = ShuffleOneLine<"4", "0", type>.s; 656 string t2 = !strconcat(t1, "\n\t"); 657 string t3 = !strconcat(t2, ShuffleOneLine<"4", "1", type>.s); 658 string t4 = !strconcat(t3, "\n\t"); 659 string t5 = !strconcat(t4, ShuffleOneLine<"4", "2", type>.s); 660 string t6 = !strconcat(t5, "\n\t"); 661 string s = !strconcat(t6, ShuffleOneLine<"4", "3", type>.s); 662 } 663 664 let neverHasSideEffects=1, VecInstType=isVecShuffle.Value in { 665 def VecShuffle_v4f32 : NVPTXVecInst<(outs V4F32Regs:$dst), 666 (ins V4F32Regs:$src1, V4F32Regs:$src2, 667 i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), 668 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", 669 ShuffleAsmStr4<"f32">.s), 670 [], FMOV32rr>; 671 672 def VecShuffle_v4i32 : NVPTXVecInst<(outs V4I32Regs:$dst), 673 (ins V4I32Regs:$src1, V4I32Regs:$src2, 674 i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), 675 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", 676 ShuffleAsmStr4<"u32">.s), 677 [], IMOV32rr>; 678 679 def VecShuffle_v4i16 : NVPTXVecInst<(outs V4I16Regs:$dst), 680 (ins V4I16Regs:$src1, V4I16Regs:$src2, 681 i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), 682 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", 683 ShuffleAsmStr4<"u16">.s), 684 [], IMOV16rr>; 685 686 def VecShuffle_v4i8 : NVPTXVecInst<(outs V4I8Regs:$dst), 687 (ins V4I8Regs:$src1, V4I8Regs:$src2, 688 i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), 689 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", 690 ShuffleAsmStr4<"u16">.s), 691 [], IMOV8rr>; 692 693 def VecShuffle_v2f32 : NVPTXVecInst<(outs V2F32Regs:$dst), 694 (ins V2F32Regs:$src1, V2F32Regs:$src2, 695 i8imm:$c0, i8imm:$c1), 696 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", 697 ShuffleAsmStr2<"f32">.s), 698 [], FMOV32rr>; 699 700 def VecShuffle_v2i32 : NVPTXVecInst<(outs V2I32Regs:$dst), 701 (ins V2I32Regs:$src1, V2I32Regs:$src2, 702 i8imm:$c0, i8imm:$c1), 703 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", 704 ShuffleAsmStr2<"u32">.s), 705 [], IMOV32rr>; 706 707 def VecShuffle_v2i8 : NVPTXVecInst<(outs V2I8Regs:$dst), 708 (ins V2I8Regs:$src1, V2I8Regs:$src2, 709 i8imm:$c0, i8imm:$c1), 710 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", 711 ShuffleAsmStr2<"u16">.s), 712 [], IMOV8rr>; 713 714 def VecShuffle_v2i16 : NVPTXVecInst<(outs V2I16Regs:$dst), 715 (ins V2I16Regs:$src1, V2I16Regs:$src2, 716 i8imm:$c0, i8imm:$c1), 717 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", 718 ShuffleAsmStr2<"u16">.s), 719 [], IMOV16rr>; 720 721 def VecShuffle_v2f64 : NVPTXVecInst<(outs V2F64Regs:$dst), 722 (ins V2F64Regs:$src1, V2F64Regs:$src2, 723 i8imm:$c0, i8imm:$c1), 724 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", 725 ShuffleAsmStr2<"f64">.s), 726 [], FMOV64rr>; 727 728 def VecShuffle_v2i64 : NVPTXVecInst<(outs V2I64Regs:$dst), 729 (ins V2I64Regs:$src1, V2I64Regs:$src2, 730 i8imm:$c0, i8imm:$c1), 731 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", 732 ShuffleAsmStr2<"u64">.s), 733 [], IMOV64rr>; 734 } 735 736 def ShuffleMask0 : SDNodeXForm<vector_shuffle, [{ 737 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 738 return CurDAG->getTargetConstant(SVOp->getMaskElt(0), MVT::i32); 739 }]>; 740 def ShuffleMask1 : SDNodeXForm<vector_shuffle, [{ 741 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 742 return CurDAG->getTargetConstant(SVOp->getMaskElt(1), MVT::i32); 743 }]>; 744 def ShuffleMask2 : SDNodeXForm<vector_shuffle, [{ 745 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 746 return CurDAG->getTargetConstant(SVOp->getMaskElt(2), MVT::i32); 747 }]>; 748 def ShuffleMask3 : SDNodeXForm<vector_shuffle, [{ 749 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 750 return CurDAG->getTargetConstant(SVOp->getMaskElt(3), MVT::i32); 751 }]>; 752 753 // The spurious call is here to silence a compiler warning about N being 754 // unused. 755 def vec_shuf : PatFrag<(ops node:$lhs, node:$rhs), 756 (vector_shuffle node:$lhs, node:$rhs), 757 [{ N->getGluedNode(); return true; }]>; 758 759 def : Pat<(v2f64 (vec_shuf:$op V2F64Regs:$src1, V2F64Regs:$src2)), 760 (VecShuffle_v2f64 V2F64Regs:$src1, V2F64Regs:$src2, 761 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; 762 763 def : Pat<(v4f32 (vec_shuf:$op V4F32Regs:$src1, V4F32Regs:$src2)), 764 (VecShuffle_v4f32 V4F32Regs:$src1, V4F32Regs:$src2, 765 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), 766 (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; 767 768 def : Pat<(v2f32 (vec_shuf:$op V2F32Regs:$src1, V2F32Regs:$src2)), 769 (VecShuffle_v2f32 V2F32Regs:$src1, V2F32Regs:$src2, 770 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; 771 772 def : Pat<(v2i64 (vec_shuf:$op V2I64Regs:$src1, V2I64Regs:$src2)), 773 (VecShuffle_v2i64 V2I64Regs:$src1, V2I64Regs:$src2, 774 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; 775 776 def : Pat<(v4i32 (vec_shuf:$op V4I32Regs:$src1, V4I32Regs:$src2)), 777 (VecShuffle_v4i32 V4I32Regs:$src1, V4I32Regs:$src2, 778 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), 779 (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; 780 781 def : Pat<(v2i32 (vec_shuf:$op V2I32Regs:$src1, V2I32Regs:$src2)), 782 (VecShuffle_v2i32 V2I32Regs:$src1, V2I32Regs:$src2, 783 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; 784 785 def : Pat<(v4i16 (vec_shuf:$op V4I16Regs:$src1, V4I16Regs:$src2)), 786 (VecShuffle_v4i16 V4I16Regs:$src1, V4I16Regs:$src2, 787 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), 788 (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; 789 790 def : Pat<(v2i16 (vec_shuf:$op V2I16Regs:$src1, V2I16Regs:$src2)), 791 (VecShuffle_v2i16 V2I16Regs:$src1, V2I16Regs:$src2, 792 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; 793 794 def : Pat<(v4i8 (vec_shuf:$op V4I8Regs:$src1, V4I8Regs:$src2)), 795 (VecShuffle_v4i8 V4I8Regs:$src1, V4I8Regs:$src2, 796 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), 797 (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; 798 799 def : Pat<(v2i8 (vec_shuf:$op V2I8Regs:$src1, V2I8Regs:$src2)), 800 (VecShuffle_v2i8 V2I8Regs:$src1, V2I8Regs:$src2, 801 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; 802 803 class Build_Vector2<string asmstr, NVPTXRegClass vclass, NVPTXRegClass sclass, 804 NVPTXInst si> 805 : NVPTXVecInst<(outs vclass:$dst), 806 (ins sclass:$a1, sclass:$a2), 807 !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2}};"), 808 [(set vclass:$dst, (build_vector sclass:$a1, sclass:$a2))], 809 si>; 810 class Build_Vector4<string asmstr, NVPTXRegClass vclass, NVPTXRegClass sclass, 811 NVPTXInst si> 812 : NVPTXVecInst<(outs vclass:$dst), 813 (ins sclass:$a1, sclass:$a2, sclass:$a3, sclass:$a4), 814 !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2, $a3, $a4}};"), 815 [(set vclass:$dst, 816 (build_vector sclass:$a1, sclass:$a2, 817 sclass:$a3, sclass:$a4))], si>; 818 819 let isAsCheapAsAMove=1, VecInstType=isVecBuild.Value in { 820 def Build_Vector2_f32 : Build_Vector2<"mov.v2.f32", V2F32Regs, Float32Regs, 821 FMOV32rr>; 822 def Build_Vector2_f64 : Build_Vector2<"mov.v2.f64", V2F64Regs, Float64Regs, 823 FMOV64rr>; 824 825 def Build_Vector2_i32 : Build_Vector2<"mov.v2.u32", V2I32Regs, Int32Regs, 826 IMOV32rr>; 827 def Build_Vector2_i64 : Build_Vector2<"mov.v2.u64", V2I64Regs, Int64Regs, 828 IMOV64rr>; 829 def Build_Vector2_i16 : Build_Vector2<"mov.v2.u16", V2I16Regs, Int16Regs, 830 IMOV16rr>; 831 def Build_Vector2_i8 : Build_Vector2<"mov.v2.u16", V2I8Regs, Int8Regs, 832 IMOV8rr>; 833 834 def Build_Vector4_f32 : Build_Vector4<"mov.v4.f32", V4F32Regs, Float32Regs, 835 FMOV32rr>; 836 837 def Build_Vector4_i32 : Build_Vector4<"mov.v4.u32", V4I32Regs, Int32Regs, 838 IMOV32rr>; 839 def Build_Vector4_i16 : Build_Vector4<"mov.v4.u16", V4I16Regs, Int16Regs, 840 IMOV16rr>; 841 def Build_Vector4_i8 : Build_Vector4<"mov.v4.u16", V4I8Regs, Int8Regs, 842 IMOV8rr>; 843 } 844 845 class Vec_Move<string asmstr, NVPTXRegClass vclass, NVPTXInst sop=NOP> 846 : NVPTXVecInst<(outs vclass:$dst), (ins vclass:$src), 847 !strconcat(asmstr, "\t${dst:vecfull}, ${src:vecfull};"), 848 [], sop>; 849 850 let isAsCheapAsAMove=1, neverHasSideEffects=1, IsSimpleMove=1, 851 VecInstType=isVecOther.Value in { 852 def V4f32Mov : Vec_Move<"mov.v4.f32", V4F32Regs, FMOV32rr>; 853 def V2f32Mov : Vec_Move<"mov.v2.f32", V2F32Regs, FMOV32rr>; 854 855 def V4i32Mov : Vec_Move<"mov.v4.u32", V4I32Regs, IMOV32rr>; 856 def V2i32Mov : Vec_Move<"mov.v2.u32", V2I32Regs, IMOV32rr>; 857 858 def V4i16Mov : Vec_Move<"mov.v4.u16", V4I16Regs, IMOV16rr>; 859 def V2i16Mov : Vec_Move<"mov.v2.u16", V2I16Regs, IMOV16rr>; 860 861 def V4i8Mov : Vec_Move<"mov.v4.u16", V4I8Regs, IMOV8rr>; 862 def V2i8Mov : Vec_Move<"mov.v2.u16", V2I8Regs, IMOV8rr>; 863 864 def V2f64Mov : Vec_Move<"mov.v2.f64", V2F64Regs, FMOV64rr>; 865 def V2i64Mov : Vec_Move<"mov.v2.u64", V2I64Regs, IMOV64rr>; 866 } 867 868 // extract subvector patterns 869 def extract_subvec : SDNode<"ISD::EXTRACT_SUBVECTOR", 870 SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>>; 871 872 def : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 0)), 873 (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 0), 874 (V4f32Extract V4F32Regs:$src, 1))>; 875 def : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 2)), 876 (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 2), 877 (V4f32Extract V4F32Regs:$src, 3))>; 878 def : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 0)), 879 (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 0), 880 (V4i32Extract V4I32Regs:$src, 1))>; 881 def : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 2)), 882 (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 2), 883 (V4i32Extract V4I32Regs:$src, 3))>; 884 def : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 0)), 885 (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 0), 886 (V4i16Extract V4I16Regs:$src, 1))>; 887 def : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 2)), 888 (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 2), 889 (V4i16Extract V4I16Regs:$src, 3))>; 890 def : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 0)), 891 (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 0), 892 (V4i8Extract V4I8Regs:$src, 1))>; 893 def : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 2)), 894 (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 2), 895 (V4i8Extract V4I8Regs:$src, 3))>; 896 897 // Select instructions 898 class Select_OneLine<string type, string pos> { 899 string t1 = !strconcat("selp.", type); 900 string t2 = !strconcat(t1, " \t${dst}_"); 901 string t3 = !strconcat(t2, pos); 902 string t4 = !strconcat(t3, ", ${src1}_"); 903 string t5 = !strconcat(t4, pos); 904 string t6 = !strconcat(t5, ", ${src2}_"); 905 string t7 = !strconcat(t6, pos); 906 string s = !strconcat(t7, ", $p;"); 907 } 908 909 class Select_Str2<string type> { 910 string t1 = Select_OneLine<type, "0">.s; 911 string t2 = !strconcat(t1, "\n\t"); 912 string s = !strconcat(t2, Select_OneLine<type, "1">.s); 913 } 914 915 class Select_Str4<string type> { 916 string t1 = Select_OneLine<type, "0">.s; 917 string t2 = !strconcat(t1, "\n\t"); 918 string t3 = !strconcat(t2, Select_OneLine<type, "1">.s); 919 string t4 = !strconcat(t3, "\n\t"); 920 string t5 = !strconcat(t4, Select_OneLine<type, "2">.s); 921 string t6 = !strconcat(t5, "\n\t"); 922 string s = !strconcat(t6, Select_OneLine<type, "3">.s); 923 924 } 925 926 class Vec_Select<NVPTXRegClass vclass, string asmstr, NVPTXInst sop> 927 : NVPTXVecInst<(outs vclass:$dst), 928 (ins vclass:$src1, vclass:$src2, Int1Regs:$p), 929 asmstr, 930 [(set vclass:$dst, (select Int1Regs:$p, vclass:$src1, 931 vclass:$src2))], 932 sop>; 933 934 let VecInstType=isVecOther.Value in { 935 def V2I64_Select : Vec_Select<V2I64Regs, Select_Str2<"b64">.s, SELECTi64rr>; 936 def V4I32_Select : Vec_Select<V4I32Regs, Select_Str4<"b32">.s, SELECTi32rr>; 937 def V2I32_Select : Vec_Select<V2I32Regs, Select_Str2<"b32">.s, SELECTi32rr>; 938 def V4I16_Select : Vec_Select<V4I16Regs, Select_Str4<"b16">.s, SELECTi16rr>; 939 def V2I16_Select : Vec_Select<V2I16Regs, Select_Str2<"b16">.s, SELECTi16rr>; 940 def V4I8_Select : Vec_Select<V4I8Regs, Select_Str4<"b16">.s, SELECTi8rr>; 941 def V2I8_Select : Vec_Select<V2I8Regs, Select_Str2<"b16">.s, SELECTi8rr>; 942 943 def V2F64_Select : Vec_Select<V2F64Regs, Select_Str2<"f64">.s, SELECTf64rr>; 944 def V4F32_Select : Vec_Select<V4F32Regs, Select_Str4<"f32">.s, SELECTf32rr>; 945 def V2F32_Select : Vec_Select<V2F32Regs, Select_Str2<"f32">.s, SELECTf32rr>; 946 } 947 948 // Comparison instructions 949 950 // setcc convenience fragments. 951 def vsetoeq : PatFrag<(ops node:$lhs, node:$rhs), 952 (setcc node:$lhs, node:$rhs, SETOEQ)>; 953 def vsetogt : PatFrag<(ops node:$lhs, node:$rhs), 954 (setcc node:$lhs, node:$rhs, SETOGT)>; 955 def vsetoge : PatFrag<(ops node:$lhs, node:$rhs), 956 (setcc node:$lhs, node:$rhs, SETOGE)>; 957 def vsetolt : PatFrag<(ops node:$lhs, node:$rhs), 958 (setcc node:$lhs, node:$rhs, SETOLT)>; 959 def vsetole : PatFrag<(ops node:$lhs, node:$rhs), 960 (setcc node:$lhs, node:$rhs, SETOLE)>; 961 def vsetone : PatFrag<(ops node:$lhs, node:$rhs), 962 (setcc node:$lhs, node:$rhs, SETONE)>; 963 def vseto : PatFrag<(ops node:$lhs, node:$rhs), 964 (setcc node:$lhs, node:$rhs, SETO)>; 965 def vsetuo : PatFrag<(ops node:$lhs, node:$rhs), 966 (setcc node:$lhs, node:$rhs, SETUO)>; 967 def vsetueq : PatFrag<(ops node:$lhs, node:$rhs), 968 (setcc node:$lhs, node:$rhs, SETUEQ)>; 969 def vsetugt : PatFrag<(ops node:$lhs, node:$rhs), 970 (setcc node:$lhs, node:$rhs, SETUGT)>; 971 def vsetuge : PatFrag<(ops node:$lhs, node:$rhs), 972 (setcc node:$lhs, node:$rhs, SETUGE)>; 973 def vsetult : PatFrag<(ops node:$lhs, node:$rhs), 974 (setcc node:$lhs, node:$rhs, SETULT)>; 975 def vsetule : PatFrag<(ops node:$lhs, node:$rhs), 976 (setcc node:$lhs, node:$rhs, SETULE)>; 977 def vsetune : PatFrag<(ops node:$lhs, node:$rhs), 978 (setcc node:$lhs, node:$rhs, SETUNE)>; 979 def vseteq : PatFrag<(ops node:$lhs, node:$rhs), 980 (setcc node:$lhs, node:$rhs, SETEQ)>; 981 def vsetgt : PatFrag<(ops node:$lhs, node:$rhs), 982 (setcc node:$lhs, node:$rhs, SETGT)>; 983 def vsetge : PatFrag<(ops node:$lhs, node:$rhs), 984 (setcc node:$lhs, node:$rhs, SETGE)>; 985 def vsetlt : PatFrag<(ops node:$lhs, node:$rhs), 986 (setcc node:$lhs, node:$rhs, SETLT)>; 987 def vsetle : PatFrag<(ops node:$lhs, node:$rhs), 988 (setcc node:$lhs, node:$rhs, SETLE)>; 989 def vsetne : PatFrag<(ops node:$lhs, node:$rhs), 990 (setcc node:$lhs, node:$rhs, SETNE)>; 991 992 class Vec_Compare<PatFrag op, NVPTXRegClass outrclass, NVPTXRegClass inrclass, 993 NVPTXInst sop> 994 : NVPTXVecInst<(outs outrclass:$dst), 995 (ins inrclass:$a, inrclass:$b), 996 "Unsupported", 997 [(set outrclass:$dst, (op inrclass:$a, inrclass:$b))], 998 sop>; 999 1000 multiclass Vec_Compare_All<PatFrag op, 1001 NVPTXInst inst8, 1002 NVPTXInst inst16, 1003 NVPTXInst inst32, 1004 NVPTXInst inst64> 1005 { 1006 def V2I8 : Vec_Compare<op, V2I8Regs, V2I8Regs, inst8>; 1007 def V4I8 : Vec_Compare<op, V4I8Regs, V4I8Regs, inst8>; 1008 def V2I16 : Vec_Compare<op, V2I16Regs, V2I16Regs, inst16>; 1009 def V4I16 : Vec_Compare<op, V4I16Regs, V4I16Regs, inst16>; 1010 def V2I32 : Vec_Compare<op, V2I32Regs, V2I32Regs, inst32>; 1011 def V4I32 : Vec_Compare<op, V4I32Regs, V4I32Regs, inst32>; 1012 def V2I64 : Vec_Compare<op, V2I64Regs, V2I64Regs, inst64>; 1013 } 1014 1015 let VecInstType=isVecOther.Value in { 1016 defm VecSGT : Vec_Compare_All<vsetgt, ISetSGTi8rr_toi8, ISetSGTi16rr_toi16, 1017 ISetSGTi32rr_toi32, ISetSGTi64rr_toi64>; 1018 defm VecUGT : Vec_Compare_All<vsetugt, ISetUGTi8rr_toi8, ISetUGTi16rr_toi16, 1019 ISetUGTi32rr_toi32, ISetUGTi64rr_toi64>; 1020 defm VecSLT : Vec_Compare_All<vsetlt, ISetSLTi8rr_toi8, ISetSLTi16rr_toi16, 1021 ISetSLTi32rr_toi32, ISetSLTi64rr_toi64>; 1022 defm VecULT : Vec_Compare_All<vsetult, ISetULTi8rr_toi8, ISetULTi16rr_toi16, 1023 ISetULTi32rr_toi32, ISetULTi64rr_toi64>; 1024 defm VecSGE : Vec_Compare_All<vsetge, ISetSGEi8rr_toi8, ISetSGEi16rr_toi16, 1025 ISetSGEi32rr_toi32, ISetSGEi64rr_toi64>; 1026 defm VecUGE : Vec_Compare_All<vsetuge, ISetUGEi8rr_toi8, ISetUGEi16rr_toi16, 1027 ISetUGEi32rr_toi32, ISetUGEi64rr_toi64>; 1028 defm VecSLE : Vec_Compare_All<vsetle, ISetSLEi8rr_toi8, ISetSLEi16rr_toi16, 1029 ISetSLEi32rr_toi32, ISetSLEi64rr_toi64>; 1030 defm VecULE : Vec_Compare_All<vsetule, ISetULEi8rr_toi8, ISetULEi16rr_toi16, 1031 ISetULEi32rr_toi32, ISetULEi64rr_toi64>; 1032 defm VecSEQ : Vec_Compare_All<vseteq, ISetSEQi8rr_toi8, ISetSEQi16rr_toi16, 1033 ISetSEQi32rr_toi32, ISetSEQi64rr_toi64>; 1034 defm VecUEQ : Vec_Compare_All<vsetueq, ISetUEQi8rr_toi8, ISetUEQi16rr_toi16, 1035 ISetUEQi32rr_toi32, ISetUEQi64rr_toi64>; 1036 defm VecSNE : Vec_Compare_All<vsetne, ISetSNEi8rr_toi8, ISetSNEi16rr_toi16, 1037 ISetSNEi32rr_toi32, ISetSNEi64rr_toi64>; 1038 defm VecUNE : Vec_Compare_All<vsetune, ISetUNEi8rr_toi8, ISetUNEi16rr_toi16, 1039 ISetUNEi32rr_toi32, ISetUNEi64rr_toi64>; 1040 } 1041 1042 multiclass FVec_Compare_All<PatFrag op, 1043 NVPTXInst instf32, 1044 NVPTXInst instf64> 1045 { 1046 def V2F32 : Vec_Compare<op, V2I32Regs, V2F32Regs, instf32>; 1047 def V4F32 : Vec_Compare<op, V4I32Regs, V4F32Regs, instf32>; 1048 def V2F64 : Vec_Compare<op, V2I64Regs, V2F64Regs, instf64>; 1049 } 1050 1051 let VecInstType=isVecOther.Value in { 1052 defm FVecGT : FVec_Compare_All<vsetogt, FSetGTf32rr_toi32, 1053 FSetGTf64rr_toi64>; 1054 defm FVecLT : FVec_Compare_All<vsetolt, FSetLTf32rr_toi32, 1055 FSetLTf64rr_toi64>; 1056 defm FVecGE : FVec_Compare_All<vsetoge, FSetGEf32rr_toi32, 1057 FSetGEf64rr_toi64>; 1058 defm FVecLE : FVec_Compare_All<vsetole, FSetLEf32rr_toi32, 1059 FSetLEf64rr_toi64>; 1060 defm FVecEQ : FVec_Compare_All<vsetoeq, FSetEQf32rr_toi32, 1061 FSetEQf64rr_toi64>; 1062 defm FVecNE : FVec_Compare_All<vsetone, FSetNEf32rr_toi32, 1063 FSetNEf64rr_toi64>; 1064 1065 defm FVecUGT : FVec_Compare_All<vsetugt, FSetUGTf32rr_toi32, 1066 FSetUGTf64rr_toi64>; 1067 defm FVecULT : FVec_Compare_All<vsetult, FSetULTf32rr_toi32, 1068 FSetULTf64rr_toi64>; 1069 defm FVecUGE : FVec_Compare_All<vsetuge, FSetUGEf32rr_toi32, 1070 FSetUGEf64rr_toi64>; 1071 defm FVecULE : FVec_Compare_All<vsetule, FSetULEf32rr_toi32, 1072 FSetULEf64rr_toi64>; 1073 defm FVecUEQ : FVec_Compare_All<vsetueq, FSetUEQf32rr_toi32, 1074 FSetUEQf64rr_toi64>; 1075 defm FVecUNE : FVec_Compare_All<vsetune, FSetUNEf32rr_toi32, 1076 FSetUNEf64rr_toi64>; 1077 1078 defm FVecNUM : FVec_Compare_All<vseto, FSetNUMf32rr_toi32, 1079 FSetNUMf64rr_toi64>; 1080 defm FVecNAN : FVec_Compare_All<vsetuo, FSetNANf32rr_toi32, 1081 FSetNANf64rr_toi64>; 1082 } 1083 1084 class LoadParamScalar4Inst<NVPTXRegClass regclass, string opstr> : 1085 NVPTXInst<(outs regclass:$d1, regclass:$d2, regclass:$d3, regclass:$d4), 1086 (ins i32imm:$a, i32imm:$b), 1087 !strconcat(!strconcat("ld.param", opstr), 1088 "\t{{$d1, $d2, $d3, $d4}}, [retval0+$b];"), []>; 1089 1090 class LoadParamScalar2Inst<NVPTXRegClass regclass, string opstr> : 1091 NVPTXInst<(outs regclass:$d1, regclass:$d2), 1092 (ins i32imm:$a, i32imm:$b), 1093 !strconcat(!strconcat("ld.param", opstr), 1094 "\t{{$d1, $d2}}, [retval0+$b];"), []>; 1095 1096 1097 class StoreParamScalar4Inst<NVPTXRegClass regclass, string opstr> : 1098 NVPTXInst<(outs), 1099 (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4, 1100 i32imm:$a, i32imm:$b), 1101 !strconcat(!strconcat("st.param", opstr), 1102 "\t[param$a+$b], {{$s1, $s2, $s3, $s4}};"), []>; 1103 1104 class StoreParamScalar2Inst<NVPTXRegClass regclass, string opstr> : 1105 NVPTXInst<(outs), 1106 (ins regclass:$s1, regclass:$s2, i32imm:$a, i32imm:$b), 1107 !strconcat(!strconcat("st.param", opstr), 1108 "\t[param$a+$b], {{$s1, $s2}};"), []>; 1109 1110 class StoreRetvalScalar4Inst<NVPTXRegClass regclass, string opstr> : 1111 NVPTXInst<(outs), 1112 (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4, 1113 i32imm:$a), 1114 !strconcat(!strconcat("st.param", opstr), 1115 "\t[func_retval+$a], {{$s1, $s2, $s3, $s4}};"), []>; 1116 1117 class StoreRetvalScalar2Inst<NVPTXRegClass regclass, string opstr> : 1118 NVPTXInst<(outs), 1119 (ins regclass:$s1, regclass:$s2, i32imm:$a), 1120 !strconcat(!strconcat("st.param", opstr), 1121 "\t[func_retval+$a], {{$s1, $s2}};"), []>; 1122 1123 def LoadParamScalar4I32 : LoadParamScalar4Inst<Int32Regs, ".v4.b32">; 1124 def LoadParamScalar4I16 : LoadParamScalar4Inst<Int16Regs, ".v4.b16">; 1125 def LoadParamScalar4I8 : LoadParamScalar4Inst<Int8Regs, ".v4.b8">; 1126 1127 def LoadParamScalar2I64 : LoadParamScalar2Inst<Int32Regs, ".v2.b64">; 1128 def LoadParamScalar2I32 : LoadParamScalar2Inst<Int32Regs, ".v2.b32">; 1129 def LoadParamScalar2I16 : LoadParamScalar2Inst<Int32Regs, ".v2.b16">; 1130 def LoadParamScalar2I8 : LoadParamScalar2Inst<Int32Regs, ".v2.b8">; 1131 1132 def LoadParamScalar4F32 : LoadParamScalar4Inst<Float32Regs, ".v4.f32">; 1133 def LoadParamScalar2F32 : LoadParamScalar2Inst<Float32Regs, ".v2.f32">; 1134 def LoadParamScalar2F64 : LoadParamScalar2Inst<Float64Regs, ".v2.f64">; 1135 1136 def StoreParamScalar4I32 : StoreParamScalar4Inst<Int32Regs, ".v4.b32">; 1137 def StoreParamScalar4I16 : StoreParamScalar4Inst<Int16Regs, ".v4.b16">; 1138 def StoreParamScalar4I8 : StoreParamScalar4Inst<Int8Regs, ".v4.b8">; 1139 1140 def StoreParamScalar2I64 : StoreParamScalar2Inst<Int64Regs, ".v2.b64">; 1141 def StoreParamScalar2I32 : StoreParamScalar2Inst<Int32Regs, ".v2.b32">; 1142 def StoreParamScalar2I16 : StoreParamScalar2Inst<Int16Regs, ".v2.b16">; 1143 def StoreParamScalar2I8 : StoreParamScalar2Inst<Int8Regs, ".v2.b8">; 1144 1145 def StoreParamScalar4F32 : StoreParamScalar4Inst<Float32Regs, ".v4.f32">; 1146 def StoreParamScalar2F32 : StoreParamScalar2Inst<Float32Regs, ".v2.f32">; 1147 def StoreParamScalar2F64 : StoreParamScalar2Inst<Float64Regs, ".v2.f64">; 1148 1149 def StoreRetvalScalar4I32 : StoreRetvalScalar4Inst<Int32Regs, ".v4.b32">; 1150 def StoreRetvalScalar4I16 : StoreRetvalScalar4Inst<Int16Regs, ".v4.b16">; 1151 def StoreRetvalScalar4I8 : StoreRetvalScalar4Inst<Int8Regs, ".v4.b8">; 1152 1153 def StoreRetvalScalar2I64 : StoreRetvalScalar2Inst<Int64Regs, ".v2.b64">; 1154 def StoreRetvalScalar2I32 : StoreRetvalScalar2Inst<Int32Regs, ".v2.b32">; 1155 def StoreRetvalScalar2I16 : StoreRetvalScalar2Inst<Int16Regs, ".v2.b16">; 1156 def StoreRetvalScalar2I8 : StoreRetvalScalar2Inst<Int8Regs, ".v2.b8">; 1157 1158 def StoreRetvalScalar4F32 : StoreRetvalScalar4Inst<Float32Regs, ".v4.f32">; 1159 def StoreRetvalScalar2F32 : StoreRetvalScalar2Inst<Float32Regs, ".v2.f32">; 1160 def StoreRetvalScalar2F64 : StoreRetvalScalar2Inst<Float64Regs, ".v2.f64">; 1161 1162 class LoadParamVecInst<NVPTXRegClass regclass, string opstr, NVPTXInst sop=NOP>: 1163 NVPTXVecInst<(outs regclass:$dst), (ins i32imm:$a, i32imm:$b), 1164 "loadparam : $dst <- [$a, $b]", 1165 [(set regclass:$dst, (LoadParam (i32 imm:$a), (i32 imm:$b)))], 1166 sop>; 1167 1168 class StoreParamVecInst<NVPTXRegClass regclass, string opstr, NVPTXInst sop=NOP> 1169 : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), 1170 "storeparam : [$a, $b] <- $val", 1171 [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)], sop>; 1172 1173 class StoreRetvalVecInst<NVPTXRegClass regclass, string opstr, 1174 NVPTXInst sop=NOP> 1175 : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a), 1176 "storeretval : retval[$a] <- $val", 1177 [(StoreRetval (i32 imm:$a), regclass:$val)], sop>; 1178 1179 let VecInstType=isVecLD.Value in { 1180 def LoadParamV4I32 : LoadParamVecInst<V4I32Regs, ".v4.b32", 1181 LoadParamScalar4I32>; 1182 def LoadParamV4I16 : LoadParamVecInst<V4I16Regs, ".v4.b16", 1183 LoadParamScalar4I16>; 1184 def LoadParamV4I8 : LoadParamVecInst<V4I8Regs, ".v4.b8", 1185 LoadParamScalar4I8>; 1186 1187 def LoadParamV2I64 : LoadParamVecInst<V2I64Regs, ".v2.b64", 1188 LoadParamScalar2I64>; 1189 def LoadParamV2I32 : LoadParamVecInst<V2I32Regs, ".v2.b32", 1190 LoadParamScalar2I32>; 1191 def LoadParamV2I16 : LoadParamVecInst<V2I16Regs, ".v2.b16", 1192 LoadParamScalar2I16>; 1193 def LoadParamV2I8 : LoadParamVecInst<V2I8Regs, ".v2.b8", 1194 LoadParamScalar2I8>; 1195 1196 def LoadParamV4F32 : LoadParamVecInst<V4F32Regs, ".v4.f32", 1197 LoadParamScalar4F32>; 1198 def LoadParamV2F32 : LoadParamVecInst<V2F32Regs, ".v2.f32", 1199 LoadParamScalar2F32>; 1200 def LoadParamV2F64 : LoadParamVecInst<V2F64Regs, ".v2.f64", 1201 LoadParamScalar2F64>; 1202 } 1203 1204 let VecInstType=isVecST.Value in { 1205 def StoreParamV4I32 : StoreParamVecInst<V4I32Regs, ".v4.b32", 1206 StoreParamScalar4I32>; 1207 def StoreParamV4I16 : StoreParamVecInst<V4I16Regs, ".v4.b16", 1208 StoreParamScalar4I16>; 1209 def StoreParamV4I8 : StoreParamVecInst<V4I8Regs, ".v4.b8", 1210 StoreParamScalar4I8>; 1211 1212 def StoreParamV2I64 : StoreParamVecInst<V2I64Regs, ".v2.b64", 1213 StoreParamScalar2I64>; 1214 def StoreParamV2I32 : StoreParamVecInst<V2I32Regs, ".v2.b32", 1215 StoreParamScalar2I32>; 1216 def StoreParamV2I16 : StoreParamVecInst<V2I16Regs, ".v2.b16", 1217 StoreParamScalar2I16>; 1218 def StoreParamV2I8 : StoreParamVecInst<V2I8Regs, ".v2.b8", 1219 StoreParamScalar2I8>; 1220 1221 def StoreParamV4F32 : StoreParamVecInst<V4F32Regs, ".v4.f32", 1222 StoreParamScalar4F32>; 1223 def StoreParamV2F32 : StoreParamVecInst<V2F32Regs, ".v2.f32", 1224 StoreParamScalar2F32>; 1225 def StoreParamV2F64 : StoreParamVecInst<V2F64Regs, ".v2.f64", 1226 StoreParamScalar2F64>; 1227 1228 def StoreRetvalV4I32 : StoreRetvalVecInst<V4I32Regs, ".v4.b32", 1229 StoreRetvalScalar4I32>; 1230 def StoreRetvalV4I16 : StoreRetvalVecInst<V4I16Regs, ".v4.b16", 1231 StoreRetvalScalar4I16>; 1232 def StoreRetvalV4I8 : StoreRetvalVecInst<V4I8Regs, ".v4.b8", 1233 StoreRetvalScalar4I8>; 1234 1235 def StoreRetvalV2I64 : StoreRetvalVecInst<V2I64Regs, ".v2.b64", 1236 StoreRetvalScalar2I64>; 1237 def StoreRetvalV2I32 : StoreRetvalVecInst<V2I32Regs, ".v2.b32", 1238 StoreRetvalScalar2I32>; 1239 def StoreRetvalV2I16 : StoreRetvalVecInst<V2I16Regs, ".v2.b16", 1240 StoreRetvalScalar2I16>; 1241 def StoreRetvalV2I8 : StoreRetvalVecInst<V2I8Regs, ".v2.b8", 1242 StoreRetvalScalar2I8>; 1243 1244 def StoreRetvalV4F32 : StoreRetvalVecInst<V4F32Regs, ".v4.f32", 1245 StoreRetvalScalar4F32>; 1246 def StoreRetvalV2F32 : StoreRetvalVecInst<V2F32Regs, ".v2.f32", 1247 StoreRetvalScalar2F32>; 1248 def StoreRetvalV2F64 : StoreRetvalVecInst<V2F64Regs, ".v2.f64", 1249 StoreRetvalScalar2F64>; 1250 1251 } 1252 1253 1254 // Int vector to int scalar bit convert 1255 // v4i8 -> i32 1256 def : Pat<(i32 (bitconvert V4I8Regs:$s)), 1257 (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1), 1258 (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3))>; 1259 // v4i16 -> i64 1260 def : Pat<(i64 (bitconvert V4I16Regs:$s)), 1261 (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), 1262 (V4i16Extract V4I16Regs:$s,1), 1263 (V4i16Extract V4I16Regs:$s,2), 1264 (V4i16Extract V4I16Regs:$s,3))>; 1265 // v2i8 -> i16 1266 def : Pat<(i16 (bitconvert V2I8Regs:$s)), 1267 (V2I8toI16 (V2i8Extract V2I8Regs:$s,0), (V2i8Extract V2I8Regs:$s,1))>; 1268 // v2i16 -> i32 1269 def : Pat<(i32 (bitconvert V2I16Regs:$s)), 1270 (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), 1271 (V2i16Extract V2I16Regs:$s,1))>; 1272 // v2i32 -> i64 1273 def : Pat<(i64 (bitconvert V2I32Regs:$s)), 1274 (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), 1275 (V2i32Extract V2I32Regs:$s,1))>; 1276 1277 // Int scalar to int vector bit convert 1278 let VecInstType=isVecDest.Value in { 1279 // i32 -> v4i8 1280 def VecI32toV4I8 : NVPTXVecInst<(outs V4I8Regs:$d), (ins Int32Regs:$s), 1281 "Error!", 1282 [(set V4I8Regs:$d, (bitconvert Int32Regs:$s))], 1283 I32toV4I8>; 1284 // i64 -> v4i16 1285 def VecI64toV4I16 : NVPTXVecInst<(outs V4I16Regs:$d), (ins Int64Regs:$s), 1286 "Error!", 1287 [(set V4I16Regs:$d, (bitconvert Int64Regs:$s))], 1288 I64toV4I16>; 1289 // i16 -> v2i8 1290 def VecI16toV2I8 : NVPTXVecInst<(outs V2I8Regs:$d), (ins Int16Regs:$s), 1291 "Error!", 1292 [(set V2I8Regs:$d, (bitconvert Int16Regs:$s))], 1293 I16toV2I8>; 1294 // i32 -> v2i16 1295 def VecI32toV2I16 : NVPTXVecInst<(outs V2I16Regs:$d), (ins Int32Regs:$s), 1296 "Error!", 1297 [(set V2I16Regs:$d, (bitconvert Int32Regs:$s))], 1298 I32toV2I16>; 1299 // i64 -> v2i32 1300 def VecI64toV2I32 : NVPTXVecInst<(outs V2I32Regs:$d), (ins Int64Regs:$s), 1301 "Error!", 1302 [(set V2I32Regs:$d, (bitconvert Int64Regs:$s))], 1303 I64toV2I32>; 1304 } 1305 1306 // Int vector to int vector bit convert 1307 // v4i8 -> v2i16 1308 def : Pat<(v2i16 (bitconvert V4I8Regs:$s)), 1309 (VecI32toV2I16 1310 (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1), 1311 (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>; 1312 // v4i16 -> v2i32 1313 def : Pat<(v2i32 (bitconvert V4I16Regs:$s)), 1314 (VecI64toV2I32 1315 (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1), 1316 (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>; 1317 // v2i16 -> v4i8 1318 def : Pat<(v4i8 (bitconvert V2I16Regs:$s)), 1319 (VecI32toV4I8 1320 (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>; 1321 // v2i32 -> v4i16 1322 def : Pat<(v4i16 (bitconvert V2I32Regs:$s)), 1323 (VecI64toV4I16 1324 (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>; 1325 // v2i64 -> v4i32 1326 def : Pat<(v4i32 (bitconvert V2I64Regs:$s)), 1327 (Build_Vector4_i32 1328 (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 0), 1329 (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 1), 1330 (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 0), 1331 (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 1))>; 1332 // v4i32 -> v2i64 1333 def : Pat<(v2i64 (bitconvert V4I32Regs:$s)), 1334 (Build_Vector2_i64 1335 (V2I32toI64 (V4i32Extract V4I32Regs:$s,0), (V4i32Extract V4I32Regs:$s,1)), 1336 (V2I32toI64 (V4i32Extract V4I32Regs:$s,2), (V4i32Extract V4I32Regs:$s,3)))>; 1337 1338 // Fp scalar to fp vector convert 1339 // f64 -> v2f32 1340 let VecInstType=isVecDest.Value in { 1341 def VecF64toV2F32 : NVPTXVecInst<(outs V2F32Regs:$d), (ins Float64Regs:$s), 1342 "Error!", 1343 [(set V2F32Regs:$d, (bitconvert Float64Regs:$s))], 1344 F64toV2F32>; 1345 } 1346 1347 // Fp vector to fp scalar convert 1348 // v2f32 -> f64 1349 def : Pat<(f64 (bitconvert V2F32Regs:$s)), 1350 (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1))>; 1351 1352 // Fp scalar to int vector convert 1353 // f32 -> v4i8 1354 def : Pat<(v4i8 (bitconvert Float32Regs:$s)), 1355 (VecI32toV4I8 (BITCONVERT_32_F2I Float32Regs:$s))>; 1356 // f32 -> v2i16 1357 def : Pat<(v2i16 (bitconvert Float32Regs:$s)), 1358 (VecI32toV2I16 (BITCONVERT_32_F2I Float32Regs:$s))>; 1359 // f64 -> v4i16 1360 def : Pat<(v4i16 (bitconvert Float64Regs:$s)), 1361 (VecI64toV4I16 (BITCONVERT_64_F2I Float64Regs:$s))>; 1362 // f64 -> v2i32 1363 def : Pat<(v2i32 (bitconvert Float64Regs:$s)), 1364 (VecI64toV2I32 (BITCONVERT_64_F2I Float64Regs:$s))>; 1365 1366 // Int vector to fp scalar convert 1367 // v4i8 -> f32 1368 def : Pat<(f32 (bitconvert V4I8Regs:$s)), 1369 (BITCONVERT_32_I2F 1370 (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1), 1371 (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>; 1372 // v4i16 -> f64 1373 def : Pat<(f64 (bitconvert V4I16Regs:$s)), 1374 (BITCONVERT_64_I2F 1375 (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1), 1376 (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>; 1377 // v2i16 -> f32 1378 def : Pat<(f32 (bitconvert V2I16Regs:$s)), 1379 (BITCONVERT_32_I2F 1380 (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>; 1381 // v2i32 -> f64 1382 def : Pat<(f64 (bitconvert V2I32Regs:$s)), 1383 (BITCONVERT_64_I2F 1384 (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>; 1385 1386 // Int scalar to fp vector convert 1387 // i64 -> v2f32 1388 def : Pat<(v2f32 (bitconvert Int64Regs:$s)), 1389 (VecF64toV2F32 (BITCONVERT_64_I2F Int64Regs:$s))>; 1390 1391 // Fp vector to int scalar convert 1392 // v2f32 -> i64 1393 def : Pat<(i64 (bitconvert V2F32Regs:$s)), 1394 (BITCONVERT_64_F2I 1395 (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1)))>; 1396 1397 // Int vector to fp vector convert 1398 // v2i64 -> v4f32 1399 def : Pat<(v4f32 (bitconvert V2I64Regs:$s)), 1400 (Build_Vector4_f32 1401 (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 1402 (V2i64Extract V2I64Regs:$s, 0)), 0)), 1403 (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 1404 (V2i64Extract V2I64Regs:$s, 0)), 1)), 1405 (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 1406 (V2i64Extract V2I64Regs:$s, 1)), 0)), 1407 (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 1408 (V2i64Extract V2I64Regs:$s, 1)), 1)))>; 1409 // v2i64 -> v2f64 1410 def : Pat<(v2f64 (bitconvert V2I64Regs:$s)), 1411 (Build_Vector2_f64 1412 (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,0)), 1413 (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,1)))>; 1414 // v2i32 -> v2f32 1415 def : Pat<(v2f32 (bitconvert V2I32Regs:$s)), 1416 (Build_Vector2_f32 1417 (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,0)), 1418 (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,1)))>; 1419 // v4i32 -> v2f64 1420 def : Pat<(v2f64 (bitconvert V4I32Regs:$s)), 1421 (Build_Vector2_f64 1422 (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,0), 1423 (V4i32Extract V4I32Regs:$s,1))), 1424 (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,2), 1425 (V4i32Extract V4I32Regs:$s,3))))>; 1426 // v4i32 -> v4f32 1427 def : Pat<(v4f32 (bitconvert V4I32Regs:$s)), 1428 (Build_Vector4_f32 1429 (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,0)), 1430 (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,1)), 1431 (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,2)), 1432 (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,3)))>; 1433 // v4i16 -> v2f32 1434 def : Pat<(v2f32 (bitconvert V4I16Regs:$s)), 1435 (VecF64toV2F32 (BITCONVERT_64_I2F 1436 (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), 1437 (V4i16Extract V4I16Regs:$s,1), 1438 (V4i16Extract V4I16Regs:$s,2), 1439 (V4i16Extract V4I16Regs:$s,3))))>; 1440 1441 // Fp vector to int vector convert 1442 // v2i64 <- v4f32 1443 def : Pat<(v2i64 (bitconvert V4F32Regs:$s)), 1444 (Build_Vector2_i64 1445 (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,0), 1446 (V4f32Extract V4F32Regs:$s,1))), 1447 (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,2), 1448 (V4f32Extract V4F32Regs:$s,3))))>; 1449 // v2i64 <- v2f64 1450 def : Pat<(v2i64 (bitconvert V2F64Regs:$s)), 1451 (Build_Vector2_i64 1452 (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,0)), 1453 (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,1)))>; 1454 // v2i32 <- v2f32 1455 def : Pat<(v2i32 (bitconvert V2F32Regs:$s)), 1456 (Build_Vector2_i32 1457 (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,0)), 1458 (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,1)))>; 1459 // v4i32 <- v2f64 1460 def : Pat<(v4i32 (bitconvert V2F64Regs:$s)), 1461 (Build_Vector4_i32 1462 (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 1463 (V2f64Extract V2F64Regs:$s, 0)), 0)), 1464 (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 1465 (V2f64Extract V2F64Regs:$s, 0)), 1)), 1466 (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 1467 (V2f64Extract V2F64Regs:$s, 1)), 0)), 1468 (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 1469 (V2f64Extract V2F64Regs:$s, 1)), 1)))>; 1470 // v4i32 <- v4f32 1471 def : Pat<(v4i32 (bitconvert V4F32Regs:$s)), 1472 (Build_Vector4_i32 1473 (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,0)), 1474 (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,1)), 1475 (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,2)), 1476 (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,3)))>; 1477 // v4i16 <- v2f32 1478 def : Pat<(v4i16 (bitconvert V2F32Regs:$s)), 1479 (VecI64toV4I16 (BITCONVERT_64_F2I 1480 (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), 1481 (V2f32Extract V2F32Regs:$s,1))))>; 1482