Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI %s
      2 ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s
      3 ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
      4 ; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
      5 ; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
      6 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
      7 
      8 declare i32 @llvm.r600.read.tidig.x() #0
      9 
     10 ; OPT-LABEL: @test_sink_global_small_offset_i32(
     11 ; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in
     12 ; OPT-VI: getelementptr i32, i32 addrspace(1)* %in
     13 ; OPT: br i1
     14 ; OPT-CI: ptrtoint
     15 
     16 ; GCN-LABEL: {{^}}test_sink_global_small_offset_i32:
     17 ; GCN: {{^}}BB0_2:
     18 define void @test_sink_global_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond) {
     19 entry:
     20   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
     21   %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 7
     22   %tmp0 = icmp eq i32 %cond, 0
     23   br i1 %tmp0, label %endif, label %if
     24 
     25 if:
     26   %tmp1 = load i32, i32 addrspace(1)* %in.gep
     27   br label %endif
     28 
     29 endif:
     30   %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
     31   store i32 %x, i32 addrspace(1)* %out.gep
     32   br label %done
     33 
     34 done:
     35   ret void
     36 }
     37 
     38 ; OPT-LABEL: @test_sink_global_small_max_i32_ds_offset(
     39 ; OPT: %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
     40 ; OPT: br i1
     41 
     42 ; GCN-LABEL: {{^}}test_sink_global_small_max_i32_ds_offset:
     43 ; GCN: s_and_saveexec_b64
     44 ; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
     45 ; GCN: {{^}}BB1_2:
     46 ; GCN: s_or_b64 exec
     47 define void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in, i32 %cond) {
     48 entry:
     49   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
     50   %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
     51   %tmp0 = icmp eq i32 %cond, 0
     52   br i1 %tmp0, label %endif, label %if
     53 
     54 if:
     55   %tmp1 = load i8, i8 addrspace(1)* %in.gep
     56   %tmp2 = sext i8 %tmp1 to i32
     57   br label %endif
     58 
     59 endif:
     60   %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
     61   store i32 %x, i32 addrspace(1)* %out.gep
     62   br label %done
     63 
     64 done:
     65   ret void
     66 }
     67 
     68 ; GCN-LABEL: {{^}}test_sink_global_small_max_mubuf_offset:
     69 ; GCN: s_and_saveexec_b64
     70 ; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}}
     71 ; GCN: {{^}}BB2_2:
     72 ; GCN: s_or_b64 exec
     73 define void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in, i32 %cond) {
     74 entry:
     75   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024
     76   %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4095
     77   %tmp0 = icmp eq i32 %cond, 0
     78   br i1 %tmp0, label %endif, label %if
     79 
     80 if:
     81   %tmp1 = load i8, i8 addrspace(1)* %in.gep
     82   %tmp2 = sext i8 %tmp1 to i32
     83   br label %endif
     84 
     85 endif:
     86   %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
     87   store i32 %x, i32 addrspace(1)* %out.gep
     88   br label %done
     89 
     90 done:
     91   ret void
     92 }
     93 
     94 ; GCN-LABEL: {{^}}test_sink_global_small_max_plus_1_mubuf_offset:
     95 ; GCN: s_and_saveexec_b64
     96 ; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
     97 ; GCN: {{^}}BB3_2:
     98 ; GCN: s_or_b64 exec
     99 define void @test_sink_global_small_max_plus_1_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in, i32 %cond) {
    100 entry:
    101   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
    102   %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4096
    103   %tmp0 = icmp eq i32 %cond, 0
    104   br i1 %tmp0, label %endif, label %if
    105 
    106 if:
    107   %tmp1 = load i8, i8 addrspace(1)* %in.gep
    108   %tmp2 = sext i8 %tmp1 to i32
    109   br label %endif
    110 
    111 endif:
    112   %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
    113   store i32 %x, i32 addrspace(1)* %out.gep
    114   br label %done
    115 
    116 done:
    117   ret void
    118 }
    119 
    120 ; OPT-LABEL: @test_sink_scratch_small_offset_i32(
    121 ; OPT-NOT:  getelementptr [512 x i32]
    122 ; OPT: br i1
    123 ; OPT: ptrtoint
    124 
    125 ; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32:
    126 ; GCN: s_and_saveexec_b64
    127 ; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
    128 ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
    129 ; GCN: {{^}}BB4_2:
    130 define void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond, i32 %arg) {
    131 entry:
    132   %alloca = alloca [512 x i32], align 4
    133   %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
    134   %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
    135   %add.arg = add i32 %arg, 8
    136   %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1023
    137   %tmp0 = icmp eq i32 %cond, 0
    138   br i1 %tmp0, label %endif, label %if
    139 
    140 if:
    141   store volatile i32 123, i32* %alloca.gep
    142   %tmp1 = load volatile i32, i32* %alloca.gep
    143   br label %endif
    144 
    145 endif:
    146   %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
    147   store i32 %x, i32 addrspace(1)* %out.gep.0
    148   %load = load volatile i32, i32* %alloca.gep
    149   store i32 %load, i32 addrspace(1)* %out.gep.1
    150   br label %done
    151 
    152 done:
    153   ret void
    154 }
    155 
    156 ; OPT-LABEL: @test_no_sink_scratch_large_offset_i32(
    157 ; OPT: %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
    158 ; OPT: br i1
    159 ; OPT-NOT: ptrtoint
    160 
    161 ; GCN-LABEL: {{^}}test_no_sink_scratch_large_offset_i32:
    162 ; GCN: s_and_saveexec_b64
    163 ; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
    164 ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
    165 ; GCN: {{^}}BB5_2:
    166 define void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond, i32 %arg) {
    167 entry:
    168   %alloca = alloca [512 x i32], align 4
    169   %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
    170   %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
    171   %add.arg = add i32 %arg, 8
    172   %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
    173   %tmp0 = icmp eq i32 %cond, 0
    174   br i1 %tmp0, label %endif, label %if
    175 
    176 if:
    177   store volatile i32 123, i32* %alloca.gep
    178   %tmp1 = load volatile i32, i32* %alloca.gep
    179   br label %endif
    180 
    181 endif:
    182   %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
    183   store i32 %x, i32 addrspace(1)* %out.gep.0
    184   %load = load volatile i32, i32* %alloca.gep
    185   store i32 %load, i32 addrspace(1)* %out.gep.1
    186   br label %done
    187 
    188 done:
    189   ret void
    190 }
    191 
    192 ; GCN-LABEL: {{^}}test_sink_global_vreg_sreg_i32:
    193 ; VI-DAG: s_movk_i32 flat_scratch_lo, 0x0
    194 ; VI-DAG: s_movk_i32 flat_scratch_hi, 0x0
    195 ; GCN: s_and_saveexec_b64
    196 ; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
    197 ; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
    198 ; GCN: {{^}}BB6_2:
    199 define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset, i32 %cond) {
    200 entry:
    201   %offset.ext = zext i32 %offset to i64
    202   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
    203   %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 %offset.ext
    204   %tmp0 = icmp eq i32 %cond, 0
    205   br i1 %tmp0, label %endif, label %if
    206 
    207 if:
    208   %tmp1 = load i32, i32 addrspace(1)* %in.gep
    209   br label %endif
    210 
    211 endif:
    212   %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
    213   store i32 %x, i32 addrspace(1)* %out.gep
    214   br label %done
    215 
    216 done:
    217   ret void
    218 }
    219 
    220 attributes #0 = { nounwind readnone }
    221 attributes #1 = { nounwind }
    222 
    223 
    224 
    225 ; OPT-LABEL: @test_sink_constant_small_offset_i32
    226 ; OPT-NOT:  getelementptr i32, i32 addrspace(2)*
    227 ; OPT: br i1
    228 
    229 ; GCN-LABEL: {{^}}test_sink_constant_small_offset_i32:
    230 ; GCN: s_and_saveexec_b64
    231 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x7{{$}}
    232 ; GCN: s_or_b64 exec, exec
    233 define void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
    234 entry:
    235   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
    236   %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 7
    237   %tmp0 = icmp eq i32 %cond, 0
    238   br i1 %tmp0, label %endif, label %if
    239 
    240 if:
    241   %tmp1 = load i32, i32 addrspace(2)* %in.gep
    242   br label %endif
    243 
    244 endif:
    245   %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
    246   store i32 %x, i32 addrspace(1)* %out.gep
    247   br label %done
    248 
    249 done:
    250   ret void
    251 }
    252 
    253 ; OPT-LABEL: @test_sink_constant_max_8_bit_offset_i32
    254 ; OPT-NOT:  getelementptr i32, i32 addrspace(2)*
    255 ; OPT: br i1
    256 
    257 ; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_i32:
    258 ; GCN: s_and_saveexec_b64
    259 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xff{{$}}
    260 ; GCN: s_or_b64 exec, exec
    261 define void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
    262 entry:
    263   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
    264   %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 255
    265   %tmp0 = icmp eq i32 %cond, 0
    266   br i1 %tmp0, label %endif, label %if
    267 
    268 if:
    269   %tmp1 = load i32, i32 addrspace(2)* %in.gep
    270   br label %endif
    271 
    272 endif:
    273   %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
    274   store i32 %x, i32 addrspace(1)* %out.gep
    275   br label %done
    276 
    277 done:
    278   ret void
    279 }
    280 
    281 ; OPT-LABEL: @test_sink_constant_max_8_bit_offset_p1_i32
    282 ; OPT-SI:  getelementptr i32, i32 addrspace(2)*
    283 ; OPT-CI-NOT:  getelementptr i32, i32 addrspace(2)*
    284 ; OPT-VI-NOT:  getelementptr i32, i32 addrspace(2)*
    285 ; OPT: br i1
    286 
    287 ; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_p1_i32:
    288 ; GCN: s_and_saveexec_b64
    289 ; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x400
    290 
    291 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
    292 ; GCN: s_or_b64 exec, exec
    293 define void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
    294 entry:
    295   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
    296   %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 256
    297   %tmp0 = icmp eq i32 %cond, 0
    298   br i1 %tmp0, label %endif, label %if
    299 
    300 if:
    301   %tmp1 = load i32, i32 addrspace(2)* %in.gep
    302   br label %endif
    303 
    304 endif:
    305   %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
    306   store i32 %x, i32 addrspace(1)* %out.gep
    307   br label %done
    308 
    309 done:
    310   ret void
    311 }
    312 
    313 ; OPT-LABEL: @test_sink_constant_max_32_bit_offset_i32
    314 ; OPT-SI: getelementptr i32, i32 addrspace(2)*
    315 ; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
    316 ; OPT: br i1
    317 
    318 ; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32:
    319 ; GCN: s_and_saveexec_b64
    320 ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}}
    321 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}}
    322 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
    323 ; GCN: s_or_b64 exec, exec
    324 define void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
    325 entry:
    326   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
    327   %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 4294967295
    328   %tmp0 = icmp eq i32 %cond, 0
    329   br i1 %tmp0, label %endif, label %if
    330 
    331 if:
    332   %tmp1 = load i32, i32 addrspace(2)* %in.gep
    333   br label %endif
    334 
    335 endif:
    336   %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
    337   store i32 %x, i32 addrspace(1)* %out.gep
    338   br label %done
    339 
    340 done:
    341   ret void
    342 }
    343 
    344 ; OPT-LABEL: @test_sink_constant_max_32_bit_offset_p1_i32
    345 ; OPT: getelementptr i32, i32 addrspace(2)*
    346 ; OPT: br i1
    347 
    348 ; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_p1_i32:
    349 ; GCN: s_and_saveexec_b64
    350 ; GCN: s_add_u32
    351 ; GCN: s_addc_u32
    352 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
    353 ; GCN: s_or_b64 exec, exec
    354 define void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
    355 entry:
    356   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
    357   %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 17179869181
    358   %tmp0 = icmp eq i32 %cond, 0
    359   br i1 %tmp0, label %endif, label %if
    360 
    361 if:
    362   %tmp1 = load i32, i32 addrspace(2)* %in.gep
    363   br label %endif
    364 
    365 endif:
    366   %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
    367   store i32 %x, i32 addrspace(1)* %out.gep
    368   br label %done
    369 
    370 done:
    371   ret void
    372 }
    373 
    374 ; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_i32:
    375 ; GCN: s_and_saveexec_b64
    376 ; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc{{$}}
    377 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
    378 
    379 ; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x3ffff{{$}}
    380 ; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffc{{$}}
    381 
    382 ; GCN: s_or_b64 exec, exec
    383 define void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
    384 entry:
    385   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
    386   %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262143
    387   %tmp0 = icmp eq i32 %cond, 0
    388   br i1 %tmp0, label %endif, label %if
    389 
    390 if:
    391   %tmp1 = load i32, i32 addrspace(2)* %in.gep
    392   br label %endif
    393 
    394 endif:
    395   %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
    396   store i32 %x, i32 addrspace(1)* %out.gep
    397   br label %done
    398 
    399 done:
    400   ret void
    401 }
    402 
    403 ; OPT-LABEL: @test_sink_constant_max_20_bit_byte_offset_p1_i32
    404 ; OPT-SI: getelementptr i32, i32 addrspace(2)*
    405 ; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
    406 ; OPT-VI: getelementptr i32, i32 addrspace(2)*
    407 ; OPT: br i1
    408 
    409 ; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_p1_i32:
    410 ; GCN: s_and_saveexec_b64
    411 ; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}}
    412 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
    413 
    414 ; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x40000{{$}}
    415 
    416 ; VI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}}
    417 ; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
    418 
    419 ; GCN: s_or_b64 exec, exec
    420 define void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
    421 entry:
    422   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
    423   %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262144
    424   %tmp0 = icmp eq i32 %cond, 0
    425   br i1 %tmp0, label %endif, label %if
    426 
    427 if:
    428   %tmp1 = load i32, i32 addrspace(2)* %in.gep
    429   br label %endif
    430 
    431 endif:
    432   %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
    433   store i32 %x, i32 addrspace(1)* %out.gep
    434   br label %done
    435 
    436 done:
    437   ret void
    438 }
    439