Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
      2 ; RUN: opt -S -mtriple=r600-unknown-unknown -mcpu=redwood -amdgpu-promote-alloca < %s | FileCheck -check-prefix=OPT %s
      3 
      4 declare i32 @llvm.r600.read.tidig.x() nounwind readnone
      5 
      6 ; FUNC-LABEL: {{^}}mova_same_clause:
      7 
      8 ; R600: LDS_WRITE
      9 ; R600: LDS_WRITE
     10 ; R600: LDS_READ
     11 ; R600: LDS_READ
     12 
     13 ; OPT: call i32 @llvm.r600.read.local.size.y(), !range !0
     14 ; OPT: call i32 @llvm.r600.read.local.size.z(), !range !0
     15 ; OPT: call i32 @llvm.r600.read.tidig.x(), !range !0
     16 ; OPT: call i32 @llvm.r600.read.tidig.y(), !range !0
     17 ; OPT: call i32 @llvm.r600.read.tidig.z(), !range !0
     18 
     19 define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
     20 entry:
     21   %stack = alloca [5 x i32], align 4
     22   %0 = load i32, i32 addrspace(1)* %in, align 4
     23   %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0
     24   store i32 4, i32* %arrayidx1, align 4
     25   %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
     26   %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
     27   %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1
     28   store i32 5, i32* %arrayidx3, align 4
     29   %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
     30   %2 = load i32, i32* %arrayidx10, align 4
     31   store i32 %2, i32 addrspace(1)* %out, align 4
     32   %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
     33   %3 = load i32, i32* %arrayidx12
     34   %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
     35   store i32 %3, i32 addrspace(1)* %arrayidx13
     36   ret void
     37 }
     38 
     39 ; This test checks that the stack offset is calculated correctly for structs.
     40 ; All register loads/stores should be optimized away, so there shouldn't be
     41 ; any MOVA instructions.
     42 ;
     43 ; XXX: This generated code has unnecessary MOVs, we should be able to optimize
     44 ; this.
     45 
     46 ; FUNC-LABEL: {{^}}multiple_structs:
     47 ; R600-NOT: MOVA_INT
     48 %struct.point = type { i32, i32 }
     49 
     50 define void @multiple_structs(i32 addrspace(1)* %out) #0 {
     51 entry:
     52   %a = alloca %struct.point
     53   %b = alloca %struct.point
     54   %a.x.ptr = getelementptr inbounds %struct.point, %struct.point* %a, i32 0, i32 0
     55   %a.y.ptr = getelementptr inbounds %struct.point, %struct.point* %a, i32 0, i32 1
     56   %b.x.ptr = getelementptr inbounds %struct.point, %struct.point* %b, i32 0, i32 0
     57   %b.y.ptr = getelementptr inbounds %struct.point, %struct.point* %b, i32 0, i32 1
     58   store i32 0, i32* %a.x.ptr
     59   store i32 1, i32* %a.y.ptr
     60   store i32 2, i32* %b.x.ptr
     61   store i32 3, i32* %b.y.ptr
     62   %a.indirect.ptr = getelementptr inbounds %struct.point, %struct.point* %a, i32 0, i32 0
     63   %b.indirect.ptr = getelementptr inbounds %struct.point, %struct.point* %b, i32 0, i32 0
     64   %a.indirect = load i32, i32* %a.indirect.ptr
     65   %b.indirect = load i32, i32* %b.indirect.ptr
     66   %0 = add i32 %a.indirect, %b.indirect
     67   store i32 %0, i32 addrspace(1)* %out
     68   ret void
     69 }
     70 
     71 ; Test direct access of a private array inside a loop.  The private array
     72 ; loads and stores should be lowered to copies, so there shouldn't be any
     73 ; MOVA instructions.
     74 
     75 ; FUNC-LABEL: {{^}}direct_loop:
     76 ; R600-NOT: MOVA_INT
     77 
     78 define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
     79 entry:
     80   %prv_array_const = alloca [2 x i32]
     81   %prv_array = alloca [2 x i32]
     82   %a = load i32, i32 addrspace(1)* %in
     83   %b_src_ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
     84   %b = load i32, i32 addrspace(1)* %b_src_ptr
     85   %a_dst_ptr = getelementptr inbounds [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 0
     86   store i32 %a, i32* %a_dst_ptr
     87   %b_dst_ptr = getelementptr inbounds [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 1
     88   store i32 %b, i32* %b_dst_ptr
     89   br label %for.body
     90 
     91 for.body:
     92   %inc = phi i32 [0, %entry], [%count, %for.body]
     93   %x_ptr = getelementptr inbounds [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 0
     94   %x = load i32, i32* %x_ptr
     95   %y_ptr = getelementptr inbounds [2 x i32], [2 x i32]* %prv_array, i32 0, i32 0
     96   %y = load i32, i32* %y_ptr
     97   %xy = add i32 %x, %y
     98   store i32 %xy, i32* %y_ptr
     99   %count = add i32 %inc, 1
    100   %done = icmp eq i32 %count, 4095
    101   br i1 %done, label %for.end, label %for.body
    102 
    103 for.end:
    104   %value_ptr = getelementptr inbounds [2 x i32], [2 x i32]* %prv_array, i32 0, i32 0
    105   %value = load i32, i32* %value_ptr
    106   store i32 %value, i32 addrspace(1)* %out
    107   ret void
    108 }
    109 
    110 ; FUNC-LABEL: {{^}}short_array:
    111 
    112 ; R600: MOVA_INT
    113 define void @short_array(i32 addrspace(1)* %out, i32 %index) #0 {
    114 entry:
    115   %0 = alloca [2 x i16]
    116   %1 = getelementptr inbounds [2 x i16], [2 x i16]* %0, i32 0, i32 0
    117   %2 = getelementptr inbounds [2 x i16], [2 x i16]* %0, i32 0, i32 1
    118   store i16 0, i16* %1
    119   store i16 1, i16* %2
    120   %3 = getelementptr inbounds [2 x i16], [2 x i16]* %0, i32 0, i32 %index
    121   %4 = load i16, i16* %3
    122   %5 = sext i16 %4 to i32
    123   store i32 %5, i32 addrspace(1)* %out
    124   ret void
    125 }
    126 
    127 ; FUNC-LABEL: {{^}}char_array:
    128 
    129 ; R600: MOVA_INT
    130 define void @char_array(i32 addrspace(1)* %out, i32 %index) #0 {
    131 entry:
    132   %0 = alloca [2 x i8]
    133   %1 = getelementptr inbounds [2 x i8], [2 x i8]* %0, i32 0, i32 0
    134   %2 = getelementptr inbounds [2 x i8], [2 x i8]* %0, i32 0, i32 1
    135   store i8 0, i8* %1
    136   store i8 1, i8* %2
    137   %3 = getelementptr inbounds [2 x i8], [2 x i8]* %0, i32 0, i32 %index
    138   %4 = load i8, i8* %3
    139   %5 = sext i8 %4 to i32
    140   store i32 %5, i32 addrspace(1)* %out
    141   ret void
    142 
    143 }
    144 
    145 ; Make sure we don't overwrite workitem information with private memory
    146 
    147 ; FUNC-LABEL: {{^}}work_item_info:
    148 ; R600-NOT: MOV T0.X
    149 ; Additional check in case the move ends up in the last slot
    150 ; R600-NOT: MOV * TO.X
    151 define void @work_item_info(i32 addrspace(1)* %out, i32 %in) #0 {
    152 entry:
    153   %0 = alloca [2 x i32]
    154   %1 = getelementptr inbounds [2 x i32], [2 x i32]* %0, i32 0, i32 0
    155   %2 = getelementptr inbounds [2 x i32], [2 x i32]* %0, i32 0, i32 1
    156   store i32 0, i32* %1
    157   store i32 1, i32* %2
    158   %3 = getelementptr inbounds [2 x i32], [2 x i32]* %0, i32 0, i32 %in
    159   %4 = load i32, i32* %3
    160   %5 = call i32 @llvm.r600.read.tidig.x()
    161   %6 = add i32 %4, %5
    162   store i32 %6, i32 addrspace(1)* %out
    163   ret void
    164 }
    165 
    166 ; Test that two stack objects are not stored in the same register
    167 ; The second stack object should be in T3.X
    168 ; FUNC-LABEL: {{^}}no_overlap:
    169 ; R600_CHECK: MOV
    170 ; R600_CHECK: [[CHAN:[XYZW]]]+
    171 ; R600-NOT: [[CHAN]]+
    172 define void @no_overlap(i32 addrspace(1)* %out, i32 %in) #0 {
    173 entry:
    174   %0 = alloca [3 x i8], align 1
    175   %1 = alloca [2 x i8], align 1
    176   %2 = getelementptr inbounds [3 x i8], [3 x i8]* %0, i32 0, i32 0
    177   %3 = getelementptr inbounds [3 x i8], [3 x i8]* %0, i32 0, i32 1
    178   %4 = getelementptr inbounds [3 x i8], [3 x i8]* %0, i32 0, i32 2
    179   %5 = getelementptr inbounds [2 x i8], [2 x i8]* %1, i32 0, i32 0
    180   %6 = getelementptr inbounds [2 x i8], [2 x i8]* %1, i32 0, i32 1
    181   store i8 0, i8* %2
    182   store i8 1, i8* %3
    183   store i8 2, i8* %4
    184   store i8 1, i8* %5
    185   store i8 0, i8* %6
    186   %7 = getelementptr inbounds [3 x i8], [3 x i8]* %0, i32 0, i32 %in
    187   %8 = getelementptr inbounds [2 x i8], [2 x i8]* %1, i32 0, i32 %in
    188   %9 = load i8, i8* %7
    189   %10 = load i8, i8* %8
    190   %11 = add i8 %9, %10
    191   %12 = sext i8 %11 to i32
    192   store i32 %12, i32 addrspace(1)* %out
    193   ret void
    194 }
    195 
    196 define void @char_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
    197 entry:
    198   %alloca = alloca [2 x [2 x i8]]
    199   %gep0 = getelementptr inbounds [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 0
    200   %gep1 = getelementptr inbounds [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 1
    201   store i8 0, i8* %gep0
    202   store i8 1, i8* %gep1
    203   %gep2 = getelementptr inbounds [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 %index
    204   %load = load i8, i8* %gep2
    205   %sext = sext i8 %load to i32
    206   store i32 %sext, i32 addrspace(1)* %out
    207   ret void
    208 }
    209 
    210 define void @i32_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
    211 entry:
    212   %alloca = alloca [2 x [2 x i32]]
    213   %gep0 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0
    214   %gep1 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 1
    215   store i32 0, i32* %gep0
    216   store i32 1, i32* %gep1
    217   %gep2 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 %index
    218   %load = load i32, i32* %gep2
    219   store i32 %load, i32 addrspace(1)* %out
    220   ret void
    221 }
    222 
    223 define void @i64_array_array(i64 addrspace(1)* %out, i32 %index) #0 {
    224 entry:
    225   %alloca = alloca [2 x [2 x i64]]
    226   %gep0 = getelementptr inbounds [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 0
    227   %gep1 = getelementptr inbounds [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 1
    228   store i64 0, i64* %gep0
    229   store i64 1, i64* %gep1
    230   %gep2 = getelementptr inbounds [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 %index
    231   %load = load i64, i64* %gep2
    232   store i64 %load, i64 addrspace(1)* %out
    233   ret void
    234 }
    235 
    236 %struct.pair32 = type { i32, i32 }
    237 
    238 define void @struct_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
    239 entry:
    240   %alloca = alloca [2 x [2 x %struct.pair32]]
    241   %gep0 = getelementptr inbounds [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 0, i32 1
    242   %gep1 = getelementptr inbounds [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 1, i32 1
    243   store i32 0, i32* %gep0
    244   store i32 1, i32* %gep1
    245   %gep2 = getelementptr inbounds [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 %index, i32 0
    246   %load = load i32, i32* %gep2
    247   store i32 %load, i32 addrspace(1)* %out
    248   ret void
    249 }
    250 
    251 define void @struct_pair32_array(i32 addrspace(1)* %out, i32 %index) #0 {
    252 entry:
    253   %alloca = alloca [2 x %struct.pair32]
    254   %gep0 = getelementptr inbounds [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 0, i32 1
    255   %gep1 = getelementptr inbounds [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 1, i32 0
    256   store i32 0, i32* %gep0
    257   store i32 1, i32* %gep1
    258   %gep2 = getelementptr inbounds [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 %index, i32 0
    259   %load = load i32, i32* %gep2
    260   store i32 %load, i32 addrspace(1)* %out
    261   ret void
    262 }
    263 
    264 define void @select_private(i32 addrspace(1)* %out, i32 %in) nounwind {
    265 entry:
    266   %tmp = alloca [2 x i32]
    267   %tmp1 = getelementptr inbounds  [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
    268   %tmp2 = getelementptr inbounds [2 x i32], [2 x i32]* %tmp, i32 0, i32 1
    269   store i32 0, i32* %tmp1
    270   store i32 1, i32* %tmp2
    271   %cmp = icmp eq i32 %in, 0
    272   %sel = select i1 %cmp, i32* %tmp1, i32* %tmp2
    273   %load = load i32, i32* %sel
    274   store i32 %load, i32 addrspace(1)* %out
    275   ret void
    276 }
    277 
    278 ; AMDGPUPromoteAlloca does not know how to handle ptrtoint.  When it
    279 ; finds one, it should stop trying to promote.
    280 
    281 ; FUNC-LABEL: ptrtoint:
    282 ; SI-NOT: ds_write
    283 ; SI: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
    284 ; SI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ;
    285 define void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
    286   %alloca = alloca [16 x i32]
    287   %tmp0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
    288   store i32 5, i32* %tmp0
    289   %tmp1 = ptrtoint [16 x i32]* %alloca to i32
    290   %tmp2 = add i32 %tmp1, 5
    291   %tmp3 = inttoptr i32 %tmp2 to i32*
    292   %tmp4 = getelementptr inbounds i32, i32* %tmp3, i32 %b
    293   %tmp5 = load i32, i32* %tmp4
    294   store i32 %tmp5, i32 addrspace(1)* %out
    295   ret void
    296 }
    297 
    298 ; OPT: !0 = !{i32 0, i32 2048}
    299 
    300 attributes #0 = { nounwind "amdgpu-max-waves-per-eu"="2" }
    301