Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI %s
      2 ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s
      3 ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
      4 ; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
      5 ; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
      6 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
      7 
      8 ; OPT-LABEL: @test_sink_global_small_offset_i32(
      9 ; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in
     10 ; OPT-VI: getelementptr i32, i32 addrspace(1)* %in
     11 ; OPT: br i1
     12 ; OPT-CI: ptrtoint
     13 
     14 ; GCN-LABEL: {{^}}test_sink_global_small_offset_i32:
     15 ; GCN: {{^}}BB0_2:
     16 define void @test_sink_global_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
     17 entry:
     18   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
     19   %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 7
     20   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
     21   %tmp0 = icmp eq i32 %tid, 0
     22   br i1 %tmp0, label %endif, label %if
     23 
     24 if:
     25   %tmp1 = load i32, i32 addrspace(1)* %in.gep
     26   br label %endif
     27 
     28 endif:
     29   %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
     30   store i32 %x, i32 addrspace(1)* %out.gep
     31   br label %done
     32 
     33 done:
     34   ret void
     35 }
     36 
     37 ; OPT-LABEL: @test_sink_global_small_max_i32_ds_offset(
     38 ; OPT: %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
     39 ; OPT: br i1
     40 
     41 ; GCN-LABEL: {{^}}test_sink_global_small_max_i32_ds_offset:
     42 ; GCN: s_and_saveexec_b64
     43 ; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
     44 ; GCN: {{^}}BB1_2:
     45 ; GCN: s_or_b64 exec
     46 define void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
     47 entry:
     48   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
     49   %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
     50   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
     51   %tmp0 = icmp eq i32 %tid, 0
     52   br i1 %tmp0, label %endif, label %if
     53 
     54 if:
     55   %tmp1 = load i8, i8 addrspace(1)* %in.gep
     56   %tmp2 = sext i8 %tmp1 to i32
     57   br label %endif
     58 
     59 endif:
     60   %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
     61   store i32 %x, i32 addrspace(1)* %out.gep
     62   br label %done
     63 
     64 done:
     65   ret void
     66 }
     67 
     68 ; GCN-LABEL: {{^}}test_sink_global_small_max_mubuf_offset:
     69 ; GCN: s_and_saveexec_b64
     70 ; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}}
     71 ; GCN: {{^}}BB2_2:
     72 ; GCN: s_or_b64 exec
     73 define void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
     74 entry:
     75   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024
     76   %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4095
     77   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
     78   %tmp0 = icmp eq i32 %tid, 0
     79   br i1 %tmp0, label %endif, label %if
     80 
     81 if:
     82   %tmp1 = load i8, i8 addrspace(1)* %in.gep
     83   %tmp2 = sext i8 %tmp1 to i32
     84   br label %endif
     85 
     86 endif:
     87   %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
     88   store i32 %x, i32 addrspace(1)* %out.gep
     89   br label %done
     90 
     91 done:
     92   ret void
     93 }
     94 
     95 ; GCN-LABEL: {{^}}test_sink_global_small_max_plus_1_mubuf_offset:
     96 ; GCN: s_and_saveexec_b64
     97 ; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
     98 ; GCN: {{^}}BB3_2:
     99 ; GCN: s_or_b64 exec
    100 define void @test_sink_global_small_max_plus_1_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
    101 entry:
    102   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
    103   %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4096
    104   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
    105   %tmp0 = icmp eq i32 %tid, 0
    106   br i1 %tmp0, label %endif, label %if
    107 
    108 if:
    109   %tmp1 = load i8, i8 addrspace(1)* %in.gep
    110   %tmp2 = sext i8 %tmp1 to i32
    111   br label %endif
    112 
    113 endif:
    114   %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
    115   store i32 %x, i32 addrspace(1)* %out.gep
    116   br label %done
    117 
    118 done:
    119   ret void
    120 }
    121 
    122 ; OPT-LABEL: @test_sink_scratch_small_offset_i32(
    123 ; OPT-NOT:  getelementptr [512 x i32]
    124 ; OPT: br i1
    125 ; OPT: ptrtoint
    126 
    127 ; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32:
    128 ; GCN: s_and_saveexec_b64
    129 ; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
    130 ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
    131 ; GCN: {{^}}BB4_2:
    132 define void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
    133 entry:
    134   %alloca = alloca [512 x i32], align 4
    135   %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
    136   %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
    137   %add.arg = add i32 %arg, 8
    138   %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1023
    139   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
    140   %tmp0 = icmp eq i32 %tid, 0
    141   br i1 %tmp0, label %endif, label %if
    142 
    143 if:
    144   store volatile i32 123, i32* %alloca.gep
    145   %tmp1 = load volatile i32, i32* %alloca.gep
    146   br label %endif
    147 
    148 endif:
    149   %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
    150   store i32 %x, i32 addrspace(1)* %out.gep.0
    151   %load = load volatile i32, i32* %alloca.gep
    152   store i32 %load, i32 addrspace(1)* %out.gep.1
    153   br label %done
    154 
    155 done:
    156   ret void
    157 }
    158 
    159 ; OPT-LABEL: @test_no_sink_scratch_large_offset_i32(
    160 ; OPT: %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
    161 ; OPT: br i1
    162 ; OPT-NOT: ptrtoint
    163 
    164 ; GCN-LABEL: {{^}}test_no_sink_scratch_large_offset_i32:
    165 ; GCN: s_and_saveexec_b64
    166 ; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
    167 ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
    168 ; GCN: {{^}}BB5_2:
    169 define void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
    170 entry:
    171   %alloca = alloca [512 x i32], align 4
    172   %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
    173   %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
    174   %add.arg = add i32 %arg, 8
    175   %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
    176   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
    177   %tmp0 = icmp eq i32 %tid, 0
    178   br i1 %tmp0, label %endif, label %if
    179 
    180 if:
    181   store volatile i32 123, i32* %alloca.gep
    182   %tmp1 = load volatile i32, i32* %alloca.gep
    183   br label %endif
    184 
    185 endif:
    186   %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
    187   store i32 %x, i32 addrspace(1)* %out.gep.0
    188   %load = load volatile i32, i32* %alloca.gep
    189   store i32 %load, i32 addrspace(1)* %out.gep.1
    190   br label %done
    191 
    192 done:
    193   ret void
    194 }
    195 
    196 ; GCN-LABEL: {{^}}test_sink_global_vreg_sreg_i32:
    197 ; GCN: s_and_saveexec_b64
    198 ; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
    199 ; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
    200 ; GCN: {{^}}BB6_2:
    201 define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) {
    202 entry:
    203   %offset.ext = zext i32 %offset to i64
    204   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
    205   %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 %offset.ext
    206   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
    207   %tmp0 = icmp eq i32 %tid, 0
    208   br i1 %tmp0, label %endif, label %if
    209 
    210 if:
    211   %tmp1 = load i32, i32 addrspace(1)* %in.gep
    212   br label %endif
    213 
    214 endif:
    215   %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
    216   store i32 %x, i32 addrspace(1)* %out.gep
    217   br label %done
    218 
    219 done:
    220   ret void
    221 }
    222 
    223 attributes #0 = { nounwind readnone }
    224 attributes #1 = { nounwind }
    225 
    226 
    227 
    228 ; OPT-LABEL: @test_sink_constant_small_offset_i32
    229 ; OPT-NOT:  getelementptr i32, i32 addrspace(2)*
    230 ; OPT: br i1
    231 
    232 ; GCN-LABEL: {{^}}test_sink_constant_small_offset_i32:
    233 ; GCN: s_and_saveexec_b64
    234 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x7{{$}}
    235 ; GCN: s_or_b64 exec, exec
    236 define void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
    237 entry:
    238   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
    239   %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 7
    240   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
    241   %tmp0 = icmp eq i32 %tid, 0
    242   br i1 %tmp0, label %endif, label %if
    243 
    244 if:
    245   %tmp1 = load i32, i32 addrspace(2)* %in.gep
    246   br label %endif
    247 
    248 endif:
    249   %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
    250   store i32 %x, i32 addrspace(1)* %out.gep
    251   br label %done
    252 
    253 done:
    254   ret void
    255 }
    256 
    257 ; OPT-LABEL: @test_sink_constant_max_8_bit_offset_i32
    258 ; OPT-NOT:  getelementptr i32, i32 addrspace(2)*
    259 ; OPT: br i1
    260 
    261 ; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_i32:
    262 ; GCN: s_and_saveexec_b64
    263 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xff{{$}}
    264 ; GCN: s_or_b64 exec, exec
    265 define void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
    266 entry:
    267   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
    268   %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 255
    269   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
    270   %tmp0 = icmp eq i32 %tid, 0
    271   br i1 %tmp0, label %endif, label %if
    272 
    273 if:
    274   %tmp1 = load i32, i32 addrspace(2)* %in.gep
    275   br label %endif
    276 
    277 endif:
    278   %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
    279   store i32 %x, i32 addrspace(1)* %out.gep
    280   br label %done
    281 
    282 done:
    283   ret void
    284 }
    285 
    286 ; OPT-LABEL: @test_sink_constant_max_8_bit_offset_p1_i32
    287 ; OPT-SI:  getelementptr i32, i32 addrspace(2)*
    288 ; OPT-CI-NOT:  getelementptr i32, i32 addrspace(2)*
    289 ; OPT-VI-NOT:  getelementptr i32, i32 addrspace(2)*
    290 ; OPT: br i1
    291 
    292 ; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_p1_i32:
    293 ; GCN: s_and_saveexec_b64
    294 ; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x400
    295 
    296 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
    297 ; GCN: s_or_b64 exec, exec
    298 define void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
    299 entry:
    300   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
    301   %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 256
    302   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
    303   %tmp0 = icmp eq i32 %tid, 0
    304   br i1 %tmp0, label %endif, label %if
    305 
    306 if:
    307   %tmp1 = load i32, i32 addrspace(2)* %in.gep
    308   br label %endif
    309 
    310 endif:
    311   %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
    312   store i32 %x, i32 addrspace(1)* %out.gep
    313   br label %done
    314 
    315 done:
    316   ret void
    317 }
    318 
    319 ; OPT-LABEL: @test_sink_constant_max_32_bit_offset_i32
    320 ; OPT-SI: getelementptr i32, i32 addrspace(2)*
    321 ; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
    322 ; OPT: br i1
    323 
    324 ; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32:
    325 ; GCN: s_and_saveexec_b64
    326 ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}}
    327 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}}
    328 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
    329 ; GCN: s_or_b64 exec, exec
    330 define void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
    331 entry:
    332   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
    333   %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 4294967295
    334   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
    335   %tmp0 = icmp eq i32 %tid, 0
    336   br i1 %tmp0, label %endif, label %if
    337 
    338 if:
    339   %tmp1 = load i32, i32 addrspace(2)* %in.gep
    340   br label %endif
    341 
    342 endif:
    343   %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
    344   store i32 %x, i32 addrspace(1)* %out.gep
    345   br label %done
    346 
    347 done:
    348   ret void
    349 }
    350 
    351 ; OPT-LABEL: @test_sink_constant_max_32_bit_offset_p1_i32
    352 ; OPT: getelementptr i32, i32 addrspace(2)*
    353 ; OPT: br i1
    354 
    355 ; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_p1_i32:
    356 ; GCN: s_and_saveexec_b64
    357 ; GCN: s_add_u32
    358 ; GCN: s_addc_u32
    359 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
    360 ; GCN: s_or_b64 exec, exec
    361 define void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
    362 entry:
    363   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
    364   %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 17179869181
    365   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
    366   %tmp0 = icmp eq i32 %tid, 0
    367   br i1 %tmp0, label %endif, label %if
    368 
    369 if:
    370   %tmp1 = load i32, i32 addrspace(2)* %in.gep
    371   br label %endif
    372 
    373 endif:
    374   %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
    375   store i32 %x, i32 addrspace(1)* %out.gep
    376   br label %done
    377 
    378 done:
    379   ret void
    380 }
    381 
    382 ; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_i32:
    383 ; GCN: s_and_saveexec_b64
    384 ; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc{{$}}
    385 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
    386 
    387 ; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x3ffff{{$}}
    388 ; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffc{{$}}
    389 
    390 ; GCN: s_or_b64 exec, exec
    391 define void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
    392 entry:
    393   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
    394   %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262143
    395   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
    396   %tmp0 = icmp eq i32 %tid, 0
    397   br i1 %tmp0, label %endif, label %if
    398 
    399 if:
    400   %tmp1 = load i32, i32 addrspace(2)* %in.gep
    401   br label %endif
    402 
    403 endif:
    404   %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
    405   store i32 %x, i32 addrspace(1)* %out.gep
    406   br label %done
    407 
    408 done:
    409   ret void
    410 }
    411 
    412 ; OPT-LABEL: @test_sink_constant_max_20_bit_byte_offset_p1_i32
    413 ; OPT-SI: getelementptr i32, i32 addrspace(2)*
    414 ; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
    415 ; OPT-VI: getelementptr i32, i32 addrspace(2)*
    416 ; OPT: br i1
    417 
    418 ; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_p1_i32:
    419 ; GCN: s_and_saveexec_b64
    420 ; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}}
    421 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
    422 
    423 ; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x40000{{$}}
    424 
    425 ; VI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}}
    426 ; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
    427 
    428 ; GCN: s_or_b64 exec, exec
    429 define void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
    430 entry:
    431   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
    432   %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262144
    433   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
    434   %tmp0 = icmp eq i32 %tid, 0
    435   br i1 %tmp0, label %endif, label %if
    436 
    437 if:
    438   %tmp1 = load i32, i32 addrspace(2)* %in.gep
    439   br label %endif
    440 
    441 endif:
    442   %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
    443   store i32 %x, i32 addrspace(1)* %out.gep
    444   br label %done
    445 
    446 done:
    447   ret void
    448 }
    449 
    450 %struct.foo = type { [3 x float], [3 x float] }
    451 
    452 ; OPT-LABEL: @sink_ds_address(
    453 ; OPT: ptrtoint %struct.foo addrspace(3)* %ptr to i64
    454 
    455 ; GCN-LABEL: {{^}}sink_ds_address:
    456 ; GCN: s_load_dword [[SREG1:s[0-9]+]],
    457 ; GCN: v_mov_b32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
    458 ; GCN-DAG: ds_read2_b32 v[{{[0-9+:[0-9]+}}], [[VREG1]] offset0:3 offset1:5
    459 define void @sink_ds_address(%struct.foo addrspace(3)* nocapture %ptr) nounwind {
    460 entry:
    461   %x = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0
    462   %y = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 2
    463   br label %bb32
    464 
    465 bb32:
    466   %a = load float, float addrspace(3)* %x, align 4
    467   %b = load float, float addrspace(3)* %y, align 4
    468   %cmp = fcmp one float %a, %b
    469   br i1 %cmp, label %bb34, label %bb33
    470 
    471 bb33:
    472   unreachable
    473 
    474 bb34:
    475   unreachable
    476 }
    477 
    478 declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
    479 
    480 attributes #0 = { nounwind readnone }
    481