1 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck -allow-deprecated-dag-overlap %s 2 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck -allow-deprecated-dag-overlap %s 3 ; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -nvvm-intr-range \ 4 ; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_20 %s 5 ; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda \ 6 ; RUN: -nvvm-intr-range -nvvm-intr-range-sm=30 \ 7 ; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_30 %s 8 9 define ptx_device i32 @test_tid_x() { 10 ; CHECK: mov.u32 %r{{[0-9]+}}, %tid.x; 11 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range ![[BLK_IDX_XY:[0-9]+]] 12 ; CHECK: ret; 13 %x = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 14 ret i32 %x 15 } 16 17 define ptx_device i32 @test_tid_y() { 18 ; CHECK: mov.u32 %r{{[0-9]+}}, %tid.y; 19 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.y(), !range ![[BLK_IDX_XY]] 20 ; CHECK: ret; 21 %x = call i32 @llvm.nvvm.read.ptx.sreg.tid.y() 22 ret i32 %x 23 } 24 25 define ptx_device i32 @test_tid_z() { 26 ; CHECK: mov.u32 %r{{[0-9]+}}, %tid.z; 27 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.z(), !range ![[BLK_IDX_Z:[0-9]+]] 28 ; CHECK: ret; 29 %x = call i32 @llvm.nvvm.read.ptx.sreg.tid.z() 30 ret i32 %x 31 } 32 33 define ptx_device i32 @test_tid_w() { 34 ; CHECK: mov.u32 %r{{[0-9]+}}, %tid.w; 35 ; CHECK: ret; 36 %x = call i32 @llvm.nvvm.read.ptx.sreg.tid.w() 37 ret i32 %x 38 } 39 40 define ptx_device i32 @test_ntid_x() { 41 ; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.x; 42 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !range ![[BLK_SIZE_XY:[0-9]+]] 43 ; CHECK: ret; 44 %x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 45 ret i32 %x 46 } 47 48 define ptx_device i32 @test_ntid_y() { 49 ; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.y; 50 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y(), !range ![[BLK_SIZE_XY]] 51 ; CHECK: ret; 52 %x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.y() 53 ret i32 %x 54 } 55 56 define ptx_device i32 @test_ntid_z() { 57 ; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.z; 58 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z(), !range ![[BLK_SIZE_Z:[0-9]+]] 59 ; CHECK: ret; 60 %x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.z() 61 ret i32 %x 62 } 63 64 define ptx_device i32 @test_ntid_w() { 65 ; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.w; 66 ; CHECK: ret; 67 %x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.w() 68 ret i32 %x 69 } 70 71 define ptx_device i32 @test_laneid() { 72 ; CHECK: mov.u32 %r{{[0-9]+}}, %laneid; 73 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.laneid(), !range ![[LANEID:[0-9]+]] 74 ; CHECK: ret; 75 %x = call i32 @llvm.nvvm.read.ptx.sreg.laneid() 76 ret i32 %x 77 } 78 79 define ptx_device i32 @test_warpsize() { 80 ; CHECK: mov.u32 %r{{[0-9]+}}, WARP_SZ; 81 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.warpsize(), !range ![[WARPSIZE:[0-9]+]] 82 ; CHECK: ret; 83 %x = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 84 ret i32 %x 85 } 86 87 define ptx_device i32 @test_warpid() { 88 ; CHECK: mov.u32 %r{{[0-9]+}}, %warpid; 89 ; CHECK: ret; 90 %x = call i32 @llvm.nvvm.read.ptx.sreg.warpid() 91 ret i32 %x 92 } 93 94 define ptx_device i32 @test_nwarpid() { 95 ; CHECK: mov.u32 %r{{[0-9]+}}, %nwarpid; 96 ; CHECK: ret; 97 %x = call i32 @llvm.nvvm.read.ptx.sreg.nwarpid() 98 ret i32 %x 99 } 100 101 define ptx_device i32 @test_ctaid_y() { 102 ; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.y; 103 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y(), !range ![[GRID_IDX_YZ:[0-9]+]] 104 ; CHECK: ret; 105 %x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() 106 ret i32 %x 107 } 108 109 define ptx_device i32 @test_ctaid_z() { 110 ; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.z; 111 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z(), !range ![[GRID_IDX_YZ]] 112 ; CHECK: ret; 113 %x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z() 114 ret i32 %x 115 } 116 117 define ptx_device i32 @test_ctaid_x() { 118 ; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.x; 119 ; RANGE_30: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[GRID_IDX_X:[0-9]+]] 120 ; RANGE_20: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[GRID_IDX_YZ]] 121 ; CHECK: ret; 122 %x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() 123 ret i32 %x 124 } 125 126 define ptx_device i32 @test_ctaid_w() { 127 ; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.w; 128 ; CHECK: ret; 129 %x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.w() 130 ret i32 %x 131 } 132 133 define ptx_device i32 @test_nctaid_y() { 134 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.y; 135 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y(), !range ![[GRID_SIZE_YZ:[0-9]+]] 136 ; CHECK: ret; 137 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y() 138 ret i32 %x 139 } 140 141 define ptx_device i32 @test_nctaid_z() { 142 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.z; 143 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z(), !range ![[GRID_SIZE_YZ]] 144 ; CHECK: ret; 145 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z() 146 ret i32 %x 147 } 148 149 define ptx_device i32 @test_nctaid_x() { 150 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x; 151 ; RANGE_30: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[GRID_SIZE_X:[0-9]+]] 152 ; RANGE_20: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[GRID_SIZE_YZ]] 153 ; CHECK: ret; 154 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() 155 ret i32 %x 156 } 157 158 define ptx_device i32 @test_already_has_range_md() { 159 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x; 160 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[ALREADY:[0-9]+]] 161 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range !0 162 ret i32 %x 163 } 164 165 166 define ptx_device i32 @test_nctaid_w() { 167 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.w; 168 ; CHECK: ret; 169 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.w() 170 ret i32 %x 171 } 172 173 define ptx_device i32 @test_smid() { 174 ; CHECK: mov.u32 %r{{[0-9]+}}, %smid; 175 ; CHECK: ret; 176 %x = call i32 @llvm.nvvm.read.ptx.sreg.smid() 177 ret i32 %x 178 } 179 180 define ptx_device i32 @test_nsmid() { 181 ; CHECK: mov.u32 %r{{[0-9]+}}, %nsmid; 182 ; CHECK: ret; 183 %x = call i32 @llvm.nvvm.read.ptx.sreg.nsmid() 184 ret i32 %x 185 } 186 187 define ptx_device i32 @test_gridid() { 188 ; CHECK: mov.u32 %r{{[0-9]+}}, %gridid; 189 ; CHECK: ret; 190 %x = call i32 @llvm.nvvm.read.ptx.sreg.gridid() 191 ret i32 %x 192 } 193 194 define ptx_device i32 @test_lanemask_eq() { 195 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_eq; 196 ; CHECK: ret; 197 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq() 198 ret i32 %x 199 } 200 201 define ptx_device i32 @test_lanemask_le() { 202 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_le; 203 ; CHECK: ret; 204 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.le() 205 ret i32 %x 206 } 207 208 define ptx_device i32 @test_lanemask_lt() { 209 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_lt; 210 ; CHECK: ret; 211 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt() 212 ret i32 %x 213 } 214 215 define ptx_device i32 @test_lanemask_ge() { 216 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_ge; 217 ; CHECK: ret; 218 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge() 219 ret i32 %x 220 } 221 222 define ptx_device i32 @test_lanemask_gt() { 223 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_gt; 224 ; CHECK: ret; 225 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt() 226 ret i32 %x 227 } 228 229 define ptx_device i32 @test_clock() { 230 ; CHECK: mov.u32 %r{{[0-9]+}}, %clock; 231 ; CHECK: ret; 232 %x = call i32 @llvm.nvvm.read.ptx.sreg.clock() 233 ret i32 %x 234 } 235 236 define ptx_device i64 @test_clock64() { 237 ; CHECK: mov.u64 %rd{{[0-9]+}}, %clock64; 238 ; CHECK: ret; 239 %x = call i64 @llvm.nvvm.read.ptx.sreg.clock64() 240 ret i64 %x 241 } 242 243 define ptx_device i32 @test_pm0() { 244 ; CHECK: mov.u32 %r{{[0-9]+}}, %pm0; 245 ; CHECK: ret; 246 %x = call i32 @llvm.nvvm.read.ptx.sreg.pm0() 247 ret i32 %x 248 } 249 250 define ptx_device i32 @test_pm1() { 251 ; CHECK: mov.u32 %r{{[0-9]+}}, %pm1; 252 ; CHECK: ret; 253 %x = call i32 @llvm.nvvm.read.ptx.sreg.pm1() 254 ret i32 %x 255 } 256 257 define ptx_device i32 @test_pm2() { 258 ; CHECK: mov.u32 %r{{[0-9]+}}, %pm2; 259 ; CHECK: ret; 260 %x = call i32 @llvm.nvvm.read.ptx.sreg.pm2() 261 ret i32 %x 262 } 263 264 define ptx_device i32 @test_pm3() { 265 ; CHECK: mov.u32 %r{{[0-9]+}}, %pm3; 266 ; CHECK: ret; 267 %x = call i32 @llvm.nvvm.read.ptx.sreg.pm3() 268 ret i32 %x 269 } 270 271 define ptx_device void @test_bar_sync() { 272 ; CHECK: bar.sync 0 273 ; CHECK: ret; 274 call void @llvm.nvvm.bar.sync(i32 0) 275 ret void 276 } 277 278 declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() 279 declare i32 @llvm.nvvm.read.ptx.sreg.tid.y() 280 declare i32 @llvm.nvvm.read.ptx.sreg.tid.z() 281 declare i32 @llvm.nvvm.read.ptx.sreg.tid.w() 282 declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 283 declare i32 @llvm.nvvm.read.ptx.sreg.ntid.y() 284 declare i32 @llvm.nvvm.read.ptx.sreg.ntid.z() 285 declare i32 @llvm.nvvm.read.ptx.sreg.ntid.w() 286 287 declare i32 @llvm.nvvm.read.ptx.sreg.warpsize() 288 declare i32 @llvm.nvvm.read.ptx.sreg.laneid() 289 declare i32 @llvm.nvvm.read.ptx.sreg.warpid() 290 declare i32 @llvm.nvvm.read.ptx.sreg.nwarpid() 291 292 declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() 293 declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() 294 declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.z() 295 declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.w() 296 declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() 297 declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.y() 298 declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.z() 299 declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.w() 300 301 declare i32 @llvm.nvvm.read.ptx.sreg.smid() 302 declare i32 @llvm.nvvm.read.ptx.sreg.nsmid() 303 declare i32 @llvm.nvvm.read.ptx.sreg.gridid() 304 305 declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq() 306 declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.le() 307 declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt() 308 declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge() 309 declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt() 310 311 declare i32 @llvm.nvvm.read.ptx.sreg.clock() 312 declare i64 @llvm.nvvm.read.ptx.sreg.clock64() 313 314 declare i32 @llvm.nvvm.read.ptx.sreg.pm0() 315 declare i32 @llvm.nvvm.read.ptx.sreg.pm1() 316 declare i32 @llvm.nvvm.read.ptx.sreg.pm2() 317 declare i32 @llvm.nvvm.read.ptx.sreg.pm3() 318 319 declare void @llvm.nvvm.bar.sync(i32 %i) 320 321 !0 = !{i32 0, i32 19} 322 ; RANGE-DAG: ![[ALREADY]] = !{i32 0, i32 19} 323 ; RANGE-DAG: ![[BLK_IDX_XY]] = !{i32 0, i32 1024} 324 ; RANGE-DAG: ![[BLK_IDX_XY]] = !{i32 0, i32 1024} 325 ; RANGE-DAG: ![[BLK_IDX_Z]] = !{i32 0, i32 64} 326 ; RANGE-DAG: ![[BLK_SIZE_XY]] = !{i32 1, i32 1025} 327 ; RANGE-DAG: ![[BLK_SIZE_Z]] = !{i32 1, i32 65} 328 ; RANGE-DAG: ![[LANEID]] = !{i32 0, i32 32} 329 ; RANGE-DAG: ![[WARPSIZE]] = !{i32 32, i32 33} 330 ; RANGE_30-DAG: ![[GRID_IDX_X]] = !{i32 0, i32 2147483647} 331 ; RANGE-DAG: ![[GRID_IDX_YZ]] = !{i32 0, i32 65535} 332 ; RANGE_30-DAG: ![[GRID_SIZE_X]] = !{i32 1, i32 -2147483648} 333 ; RANGE-DAG: ![[GRID_SIZE_YZ]] = !{i32 1, i32 65536} 334