Home | History | Annotate | Download | only in NVPTX
      1 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck -allow-deprecated-dag-overlap %s
      2 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck -allow-deprecated-dag-overlap %s
      3 ; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -nvvm-intr-range \
      4 ; RUN:   | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_20 %s
      5 ; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda \
      6 ; RUN:    -nvvm-intr-range -nvvm-intr-range-sm=30 \
      7 ; RUN:   | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_30 %s
      8 
      9 define ptx_device i32 @test_tid_x() {
     10 ; CHECK: mov.u32 %r{{[0-9]+}}, %tid.x;
     11 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range ![[BLK_IDX_XY:[0-9]+]]
     12 ; CHECK: ret;
     13 	%x = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
     14 	ret i32 %x
     15 }
     16 
     17 define ptx_device i32 @test_tid_y() {
     18 ; CHECK: mov.u32 %r{{[0-9]+}}, %tid.y;
     19 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.y(), !range ![[BLK_IDX_XY]]
     20 ; CHECK: ret;
     21 	%x = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
     22 	ret i32 %x
     23 }
     24 
     25 define ptx_device i32 @test_tid_z() {
     26 ; CHECK: mov.u32 %r{{[0-9]+}}, %tid.z;
     27 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.z(), !range ![[BLK_IDX_Z:[0-9]+]]
     28 ; CHECK: ret;
     29 	%x = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
     30 	ret i32 %x
     31 }
     32 
     33 define ptx_device i32 @test_tid_w() {
     34 ; CHECK: mov.u32 %r{{[0-9]+}}, %tid.w;
     35 ; CHECK: ret;
     36 	%x = call i32 @llvm.nvvm.read.ptx.sreg.tid.w()
     37 	ret i32 %x
     38 }
     39 
     40 define ptx_device i32 @test_ntid_x() {
     41 ; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.x;
     42 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !range ![[BLK_SIZE_XY:[0-9]+]]
     43 ; CHECK: ret;
     44 	%x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
     45 	ret i32 %x
     46 }
     47 
     48 define ptx_device i32 @test_ntid_y() {
     49 ; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.y;
     50 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y(), !range ![[BLK_SIZE_XY]]
     51 ; CHECK: ret;
     52 	%x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
     53 	ret i32 %x
     54 }
     55 
     56 define ptx_device i32 @test_ntid_z() {
     57 ; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.z;
     58 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z(), !range ![[BLK_SIZE_Z:[0-9]+]]
     59 ; CHECK: ret;
     60 	%x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
     61 	ret i32 %x
     62 }
     63 
     64 define ptx_device i32 @test_ntid_w() {
     65 ; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.w;
     66 ; CHECK: ret;
     67 	%x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.w()
     68 	ret i32 %x
     69 }
     70 
     71 define ptx_device i32 @test_laneid() {
     72 ; CHECK: mov.u32 %r{{[0-9]+}}, %laneid;
     73 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.laneid(), !range ![[LANEID:[0-9]+]]
     74 ; CHECK: ret;
     75 	%x = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
     76 	ret i32 %x
     77 }
     78 
     79 define ptx_device i32 @test_warpsize() {
     80 ; CHECK: mov.u32 %r{{[0-9]+}}, WARP_SZ;
     81 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.warpsize(), !range ![[WARPSIZE:[0-9]+]]
     82 ; CHECK: ret;
     83 	%x = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
     84 	ret i32 %x
     85 }
     86 
     87 define ptx_device i32 @test_warpid() {
     88 ; CHECK: mov.u32 %r{{[0-9]+}}, %warpid;
     89 ; CHECK: ret;
     90 	%x = call i32 @llvm.nvvm.read.ptx.sreg.warpid()
     91 	ret i32 %x
     92 }
     93 
     94 define ptx_device i32 @test_nwarpid() {
     95 ; CHECK: mov.u32 %r{{[0-9]+}}, %nwarpid;
     96 ; CHECK: ret;
     97 	%x = call i32 @llvm.nvvm.read.ptx.sreg.nwarpid()
     98 	ret i32 %x
     99 }
    100 
    101 define ptx_device i32 @test_ctaid_y() {
    102 ; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.y;
    103 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y(), !range ![[GRID_IDX_YZ:[0-9]+]]
    104 ; CHECK: ret;
    105 	%x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
    106 	ret i32 %x
    107 }
    108 
    109 define ptx_device i32 @test_ctaid_z() {
    110 ; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.z;
    111 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z(), !range ![[GRID_IDX_YZ]]
    112 ; CHECK: ret;
    113 	%x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
    114 	ret i32 %x
    115 }
    116 
    117 define ptx_device i32 @test_ctaid_x() {
    118 ; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.x;
    119 ; RANGE_30: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[GRID_IDX_X:[0-9]+]]
    120 ; RANGE_20: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[GRID_IDX_YZ]]
    121 ; CHECK: ret;
    122 	%x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
    123 	ret i32 %x
    124 }
    125 
    126 define ptx_device i32 @test_ctaid_w() {
    127 ; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.w;
    128 ; CHECK: ret;
    129 	%x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.w()
    130 	ret i32 %x
    131 }
    132 
    133 define ptx_device i32 @test_nctaid_y() {
    134 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.y;
    135 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y(), !range ![[GRID_SIZE_YZ:[0-9]+]]
    136 ; CHECK: ret;
    137 	%x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
    138 	ret i32 %x
    139 }
    140 
    141 define ptx_device i32 @test_nctaid_z() {
    142 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.z;
    143 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z(), !range ![[GRID_SIZE_YZ]]
    144 ; CHECK: ret;
    145 	%x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
    146 	ret i32 %x
    147 }
    148 
    149 define ptx_device i32 @test_nctaid_x() {
    150 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x;
    151 ; RANGE_30: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[GRID_SIZE_X:[0-9]+]]
    152 ; RANGE_20: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[GRID_SIZE_YZ]]
    153 ; CHECK: ret;
    154 	%x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
    155 	ret i32 %x
    156 }
    157 
    158 define ptx_device i32 @test_already_has_range_md() {
    159 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x;
    160 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[ALREADY:[0-9]+]]
    161 	%x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range !0
    162 	ret i32 %x
    163 }
    164 
    165 
    166 define ptx_device i32 @test_nctaid_w() {
    167 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.w;
    168 ; CHECK: ret;
    169 	%x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.w()
    170 	ret i32 %x
    171 }
    172 
    173 define ptx_device i32 @test_smid() {
    174 ; CHECK: mov.u32 %r{{[0-9]+}}, %smid;
    175 ; CHECK: ret;
    176 	%x = call i32 @llvm.nvvm.read.ptx.sreg.smid()
    177 	ret i32 %x
    178 }
    179 
    180 define ptx_device i32 @test_nsmid() {
    181 ; CHECK: mov.u32 %r{{[0-9]+}}, %nsmid;
    182 ; CHECK: ret;
    183 	%x = call i32 @llvm.nvvm.read.ptx.sreg.nsmid()
    184 	ret i32 %x
    185 }
    186 
    187 define ptx_device i32 @test_gridid() {
    188 ; CHECK: mov.u32 %r{{[0-9]+}}, %gridid;
    189 ; CHECK: ret;
    190 	%x = call i32 @llvm.nvvm.read.ptx.sreg.gridid()
    191 	ret i32 %x
    192 }
    193 
    194 define ptx_device i32 @test_lanemask_eq() {
    195 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_eq;
    196 ; CHECK: ret;
    197 	%x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq()
    198 	ret i32 %x
    199 }
    200 
    201 define ptx_device i32 @test_lanemask_le() {
    202 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_le;
    203 ; CHECK: ret;
    204 	%x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.le()
    205 	ret i32 %x
    206 }
    207 
    208 define ptx_device i32 @test_lanemask_lt() {
    209 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_lt;
    210 ; CHECK: ret;
    211 	%x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt()
    212 	ret i32 %x
    213 }
    214 
    215 define ptx_device i32 @test_lanemask_ge() {
    216 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_ge;
    217 ; CHECK: ret;
    218 	%x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge()
    219 	ret i32 %x
    220 }
    221 
    222 define ptx_device i32 @test_lanemask_gt() {
    223 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_gt;
    224 ; CHECK: ret;
    225 	%x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt()
    226 	ret i32 %x
    227 }
    228 
    229 define ptx_device i32 @test_clock() {
    230 ; CHECK: mov.u32 %r{{[0-9]+}}, %clock;
    231 ; CHECK: ret;
    232 	%x = call i32 @llvm.nvvm.read.ptx.sreg.clock()
    233 	ret i32 %x
    234 }
    235 
    236 define ptx_device i64 @test_clock64() {
    237 ; CHECK: mov.u64 %rd{{[0-9]+}}, %clock64;
    238 ; CHECK: ret;
    239 	%x = call i64 @llvm.nvvm.read.ptx.sreg.clock64()
    240 	ret i64 %x
    241 }
    242 
    243 define ptx_device i32 @test_pm0() {
    244 ; CHECK: mov.u32 %r{{[0-9]+}}, %pm0;
    245 ; CHECK: ret;
    246 	%x = call i32 @llvm.nvvm.read.ptx.sreg.pm0()
    247 	ret i32 %x
    248 }
    249 
    250 define ptx_device i32 @test_pm1() {
    251 ; CHECK: mov.u32 %r{{[0-9]+}}, %pm1;
    252 ; CHECK: ret;
    253 	%x = call i32 @llvm.nvvm.read.ptx.sreg.pm1()
    254 	ret i32 %x
    255 }
    256 
    257 define ptx_device i32 @test_pm2() {
    258 ; CHECK: mov.u32 %r{{[0-9]+}}, %pm2;
    259 ; CHECK: ret;
    260 	%x = call i32 @llvm.nvvm.read.ptx.sreg.pm2()
    261 	ret i32 %x
    262 }
    263 
    264 define ptx_device i32 @test_pm3() {
    265 ; CHECK: mov.u32 %r{{[0-9]+}}, %pm3;
    266 ; CHECK: ret;
    267 	%x = call i32 @llvm.nvvm.read.ptx.sreg.pm3()
    268 	ret i32 %x
    269 }
    270 
    271 define ptx_device void @test_bar_sync() {
    272 ; CHECK: bar.sync 0
    273 ; CHECK: ret;
    274 	call void @llvm.nvvm.bar.sync(i32 0)
    275 	ret void
    276 }
    277 
    278 declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
    279 declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
    280 declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
    281 declare i32 @llvm.nvvm.read.ptx.sreg.tid.w()
    282 declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
    283 declare i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
    284 declare i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
    285 declare i32 @llvm.nvvm.read.ptx.sreg.ntid.w()
    286 
    287 declare i32 @llvm.nvvm.read.ptx.sreg.warpsize()
    288 declare i32 @llvm.nvvm.read.ptx.sreg.laneid()
    289 declare i32 @llvm.nvvm.read.ptx.sreg.warpid()
    290 declare i32 @llvm.nvvm.read.ptx.sreg.nwarpid()
    291 
    292 declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
    293 declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
    294 declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
    295 declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.w()
    296 declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
    297 declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
    298 declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
    299 declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.w()
    300 
    301 declare i32 @llvm.nvvm.read.ptx.sreg.smid()
    302 declare i32 @llvm.nvvm.read.ptx.sreg.nsmid()
    303 declare i32 @llvm.nvvm.read.ptx.sreg.gridid()
    304 
    305 declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq()
    306 declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.le()
    307 declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt()
    308 declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge()
    309 declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt()
    310 
    311 declare i32 @llvm.nvvm.read.ptx.sreg.clock()
    312 declare i64 @llvm.nvvm.read.ptx.sreg.clock64()
    313 
    314 declare i32 @llvm.nvvm.read.ptx.sreg.pm0()
    315 declare i32 @llvm.nvvm.read.ptx.sreg.pm1()
    316 declare i32 @llvm.nvvm.read.ptx.sreg.pm2()
    317 declare i32 @llvm.nvvm.read.ptx.sreg.pm3()
    318 
    319 declare void @llvm.nvvm.bar.sync(i32 %i)
    320 
    321 !0 = !{i32 0, i32 19}
    322 ; RANGE-DAG: ![[ALREADY]] = !{i32 0, i32 19}
    323 ; RANGE-DAG: ![[BLK_IDX_XY]] = !{i32 0, i32 1024}
    324 ; RANGE-DAG: ![[BLK_IDX_XY]] = !{i32 0, i32 1024}
    325 ; RANGE-DAG: ![[BLK_IDX_Z]] = !{i32 0, i32 64}
    326 ; RANGE-DAG: ![[BLK_SIZE_XY]] = !{i32 1, i32 1025}
    327 ; RANGE-DAG: ![[BLK_SIZE_Z]] = !{i32 1, i32 65}
    328 ; RANGE-DAG: ![[LANEID]] = !{i32 0, i32 32}
    329 ; RANGE-DAG: ![[WARPSIZE]] = !{i32 32, i32 33}
    330 ; RANGE_30-DAG: ![[GRID_IDX_X]] = !{i32 0, i32 2147483647}
    331 ; RANGE-DAG: ![[GRID_IDX_YZ]] = !{i32 0, i32 65535}
    332 ; RANGE_30-DAG: ![[GRID_SIZE_X]] = !{i32 1, i32 -2147483648}
    333 ; RANGE-DAG: ![[GRID_SIZE_YZ]] = !{i32 1, i32 65536}
    334