Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI -check-prefix=OPT-CIVI %s
      2 ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI -check-prefix=OPT-CIVI %s
      3 ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-GFX9 %s
      4 ; RUN: llc -march=amdgcn -amdgpu-scalarize-global-loads=false -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s
      5 ; RUN: llc -march=amdgcn -amdgpu-scalarize-global-loads=false -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s
      6 ; RUN: llc -march=amdgcn -amdgpu-scalarize-global-loads=false -mcpu=gfx900 -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
      7 
      8 ; OPT-LABEL: @test_no_sink_flat_small_offset_i32(
      9 ; OPT-CIVI: getelementptr i32, i32* %in
     10 ; OPT-CIVI: br i1
     11 ; OPT-CIVI-NOT: ptrtoint
     12 
     13 ; OPT-GFX9: br
     14 ; OPT-GFX9: %sunkaddr = getelementptr i8, i8* %0, i64 28
     15 ; OPT-GFX9: %1 = bitcast i8* %sunkaddr to i32*
     16 ; OPT-GFX9: load i32, i32* %1
     17 
     18 ; GCN-LABEL: {{^}}test_no_sink_flat_small_offset_i32:
     19 ; GCN: flat_load_dword
     20 ; GCN: {{^}}BB0_2:
     21 define amdgpu_kernel void @test_no_sink_flat_small_offset_i32(i32* %out, i32* %in, i32 %cond) {
     22 entry:
     23   %out.gep = getelementptr i32, i32* %out, i64 999999
     24   %in.gep = getelementptr i32, i32* %in, i64 7
     25   %tmp0 = icmp eq i32 %cond, 0
     26   br i1 %tmp0, label %endif, label %if
     27 
     28 if:
     29   %tmp1 = load i32, i32* %in.gep
     30   br label %endif
     31 
     32 endif:
     33   %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
     34   store i32 %x, i32* %out.gep
     35   br label %done
     36 
     37 done:
     38   ret void
     39 }
     40 
     41 ; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
     42 ; OPT: getelementptr i32, i32* %out,
     43 ; rOPT-CI-NOT: getelementptr
     44 ; OPT: br i1
     45 
     46 ; OPT-CI: addrspacecast
     47 ; OPT-CI: getelementptr
     48 ; OPT-CI: bitcast
     49 ; OPT: br label
     50 
     51 ; GCN-LABEL: {{^}}test_sink_noop_addrspacecast_flat_to_global_i32:
     52 ; CI: buffer_load_dword {{v[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28
     53 define amdgpu_kernel void @test_sink_noop_addrspacecast_flat_to_global_i32(i32* %out, i32* %in, i32 %cond) {
     54 entry:
     55   %out.gep = getelementptr i32, i32* %out, i64 999999
     56   %in.gep = getelementptr i32, i32* %in, i64 7
     57   %cast = addrspacecast i32* %in.gep to i32 addrspace(1)*
     58   %tmp0 = icmp eq i32 %cond, 0
     59   br i1 %tmp0, label %endif, label %if
     60 
     61 if:
     62   %tmp1 = load i32, i32 addrspace(1)* %cast
     63   br label %endif
     64 
     65 endif:
     66   %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
     67   store i32 %x, i32* %out.gep
     68   br label %done
     69 
     70 done:
     71   ret void
     72 }
     73 
     74 ; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_constant_i32(
     75 ; OPT: getelementptr i32, i32* %out,
     76 ; OPT-CI-NOT: getelementptr
     77 ; OPT: br i1
     78 
     79 ; OPT-CI: addrspacecast
     80 ; OPT-CI: getelementptr
     81 ; OPT-CI: bitcast
     82 ; OPT: br label
     83 
     84 ; GCN-LABEL: {{^}}test_sink_noop_addrspacecast_flat_to_constant_i32:
     85 ; CI: s_load_dword {{s[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
     86 define amdgpu_kernel void @test_sink_noop_addrspacecast_flat_to_constant_i32(i32* %out, i32* %in, i32 %cond) {
     87 entry:
     88   %out.gep = getelementptr i32, i32* %out, i64 999999
     89   %in.gep = getelementptr i32, i32* %in, i64 7
     90   %cast = addrspacecast i32* %in.gep to i32 addrspace(4)*
     91   %tmp0 = icmp eq i32 %cond, 0
     92   br i1 %tmp0, label %endif, label %if
     93 
     94 if:
     95   %tmp1 = load i32, i32 addrspace(4)* %cast
     96   br label %endif
     97 
     98 endif:
     99   %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
    100   store i32 %x, i32* %out.gep
    101   br label %done
    102 
    103 done:
    104   ret void
    105 }
    106 
    107 ; OPT-LABEL: @test_sink_flat_small_max_flat_offset(
    108 ; OPT-CIVI: %in.gep = getelementptr i8, i8* %in, i64 4095
    109 ; OPT-CIVI: br
    110 ; OPT-CIVI-NOT: getelementptr
    111 ; OPT-CIVI: load i8, i8* %in.gep
    112 
    113 ; OPT-GFX9: br
    114 ; OPT-GFX9: %sunkaddr = getelementptr i8, i8* %in, i64 4095
    115 ; OPT-GFX9: load i8, i8* %sunkaddr
    116 
    117 ; GCN-LABEL: {{^}}test_sink_flat_small_max_flat_offset:
    118 ; GFX9: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
    119 ; CIVI: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}}
    120 define amdgpu_kernel void @test_sink_flat_small_max_flat_offset(i32* %out, i8* %in) #1 {
    121 entry:
    122   %out.gep = getelementptr i32, i32* %out, i32 1024
    123   %in.gep = getelementptr i8, i8* %in, i64 4095
    124   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
    125   %tmp0 = icmp eq i32 %tid, 0
    126   br i1 %tmp0, label %endif, label %if
    127 
    128 if:
    129   %tmp1 = load i8, i8* %in.gep
    130   %tmp2 = sext i8 %tmp1 to i32
    131   br label %endif
    132 
    133 endif:
    134   %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
    135   store i32 %x, i32* %out.gep
    136   br label %done
    137 
    138 done:
    139   ret void
    140 }
    141 
    142 ; OPT-LABEL: @test_sink_flat_small_max_plus_1_flat_offset(
    143 ; OPT: %in.gep = getelementptr i8, i8* %in, i64 4096
    144 ; OPT: br
    145 ; OPT-NOT: getelementptr
    146 ; OPT: load i8, i8* %in.gep
    147 
    148 ; GCN-LABEL: {{^}}test_sink_flat_small_max_plus_1_flat_offset:
    149 ; GCN: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}}
    150 define amdgpu_kernel void @test_sink_flat_small_max_plus_1_flat_offset(i32* %out, i8* %in) #1 {
    151 entry:
    152   %out.gep = getelementptr i32, i32* %out, i64 99999
    153   %in.gep = getelementptr i8, i8* %in, i64 4096
    154   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
    155   %tmp0 = icmp eq i32 %tid, 0
    156   br i1 %tmp0, label %endif, label %if
    157 
    158 if:
    159   %tmp1 = load i8, i8* %in.gep
    160   %tmp2 = sext i8 %tmp1 to i32
    161   br label %endif
    162 
    163 endif:
    164   %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
    165   store i32 %x, i32* %out.gep
    166   br label %done
    167 
    168 done:
    169   ret void
    170 }
    171 
    172 ; OPT-LABEL: @test_no_sink_flat_reg_offset(
    173 ; OPT: %in.gep = getelementptr i8, i8* %in, i64 %reg
    174 ; OPT: br
    175 
    176 ; OPT-NOT: getelementptr
    177 ; OPT: load i8, i8* %in.gep
    178 
    179 ; GCN-LABEL: {{^}}test_no_sink_flat_reg_offset:
    180 ; GCN: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}}
    181 define amdgpu_kernel void @test_no_sink_flat_reg_offset(i32* %out, i8* %in, i64 %reg) #1 {
    182 entry:
    183   %out.gep = getelementptr i32, i32* %out, i32 1024
    184   %in.gep = getelementptr i8, i8* %in, i64 %reg
    185   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
    186   %tmp0 = icmp eq i32 %tid, 0
    187   br i1 %tmp0, label %endif, label %if
    188 
    189 if:
    190   %tmp1 = load i8, i8* %in.gep
    191   %tmp2 = sext i8 %tmp1 to i32
    192   br label %endif
    193 
    194 endif:
    195   %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
    196   store i32 %x, i32* %out.gep
    197   br label %done
    198 
    199 done:
    200   ret void
    201 }
    202 
    203 declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
    204 
    205 attributes #0 = { nounwind readnone }
    206 attributes #1 = { nounwind }
    207 attributes #2 = { nounwind argmemonly }
    208