Home | History | Annotate | Download | only in R600
      1 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=R600-CHECK --check-prefix=FUNC
      2 ; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC
      3 
      4 declare i32 @llvm.r600.read.tidig.x() nounwind readnone
      5 
      6 ; FUNC-LABEL: @mova_same_clause
      7 
      8 ; R600-CHECK: LDS_WRITE
      9 ; R600-CHECK: LDS_WRITE
     10 ; R600-CHECK: LDS_READ
     11 ; R600-CHECK: LDS_READ
     12 
     13 ; SI-CHECK: DS_WRITE_B32
     14 ; SI-CHECK: DS_WRITE_B32
     15 ; SI-CHECK: DS_READ_B32
     16 ; SI-CHECK: DS_READ_B32
     17 define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
     18 entry:
     19   %stack = alloca [5 x i32], align 4
     20   %0 = load i32 addrspace(1)* %in, align 4
     21   %arrayidx1 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 %0
     22   store i32 4, i32* %arrayidx1, align 4
     23   %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %in, i32 1
     24   %1 = load i32 addrspace(1)* %arrayidx2, align 4
     25   %arrayidx3 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 %1
     26   store i32 5, i32* %arrayidx3, align 4
     27   %arrayidx10 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 0
     28   %2 = load i32* %arrayidx10, align 4
     29   store i32 %2, i32 addrspace(1)* %out, align 4
     30   %arrayidx12 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 1
     31   %3 = load i32* %arrayidx12
     32   %arrayidx13 = getelementptr inbounds i32 addrspace(1)* %out, i32 1
     33   store i32 %3, i32 addrspace(1)* %arrayidx13
     34   ret void
     35 }
     36 
     37 ; This test checks that the stack offset is calculated correctly for structs.
     38 ; All register loads/stores should be optimized away, so there shouldn't be
     39 ; any MOVA instructions.
     40 ;
     41 ; XXX: This generated code has unnecessary MOVs, we should be able to optimize
     42 ; this.
     43 
     44 ; FUNC-LABEL: @multiple_structs
     45 ; R600-CHECK-NOT: MOVA_INT
     46 ; SI-CHECK-NOT: V_MOVREL
     47 %struct.point = type { i32, i32 }
     48 
     49 define void @multiple_structs(i32 addrspace(1)* %out) {
     50 entry:
     51   %a = alloca %struct.point
     52   %b = alloca %struct.point
     53   %a.x.ptr = getelementptr %struct.point* %a, i32 0, i32 0
     54   %a.y.ptr = getelementptr %struct.point* %a, i32 0, i32 1
     55   %b.x.ptr = getelementptr %struct.point* %b, i32 0, i32 0
     56   %b.y.ptr = getelementptr %struct.point* %b, i32 0, i32 1
     57   store i32 0, i32* %a.x.ptr
     58   store i32 1, i32* %a.y.ptr
     59   store i32 2, i32* %b.x.ptr
     60   store i32 3, i32* %b.y.ptr
     61   %a.indirect.ptr = getelementptr %struct.point* %a, i32 0, i32 0
     62   %b.indirect.ptr = getelementptr %struct.point* %b, i32 0, i32 0
     63   %a.indirect = load i32* %a.indirect.ptr
     64   %b.indirect = load i32* %b.indirect.ptr
     65   %0 = add i32 %a.indirect, %b.indirect
     66   store i32 %0, i32 addrspace(1)* %out
     67   ret void
     68 }
     69 
     70 ; Test direct access of a private array inside a loop.  The private array
     71 ; loads and stores should be lowered to copies, so there shouldn't be any
     72 ; MOVA instructions.
     73 
     74 ; FUNC-LABEL: @direct_loop
     75 ; R600-CHECK-NOT: MOVA_INT
     76 ; SI-CHECK-NOT: V_MOVREL
     77 
     78 define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
     79 entry:
     80   %prv_array_const = alloca [2 x i32]
     81   %prv_array = alloca [2 x i32]
     82   %a = load i32 addrspace(1)* %in
     83   %b_src_ptr = getelementptr i32 addrspace(1)* %in, i32 1
     84   %b = load i32 addrspace(1)* %b_src_ptr
     85   %a_dst_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 0
     86   store i32 %a, i32* %a_dst_ptr
     87   %b_dst_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 1
     88   store i32 %b, i32* %b_dst_ptr
     89   br label %for.body
     90 
     91 for.body:
     92   %inc = phi i32 [0, %entry], [%count, %for.body]
     93   %x_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 0
     94   %x = load i32* %x_ptr
     95   %y_ptr = getelementptr [2 x i32]* %prv_array, i32 0, i32 0
     96   %y = load i32* %y_ptr
     97   %xy = add i32 %x, %y
     98   store i32 %xy, i32* %y_ptr
     99   %count = add i32 %inc, 1
    100   %done = icmp eq i32 %count, 4095
    101   br i1 %done, label %for.end, label %for.body
    102 
    103 for.end:
    104   %value_ptr = getelementptr [2 x i32]* %prv_array, i32 0, i32 0
    105   %value = load i32* %value_ptr
    106   store i32 %value, i32 addrspace(1)* %out
    107   ret void
    108 }
    109 
    110 ; FUNC-LABEL: @short_array
    111 
    112 ; R600-CHECK: MOVA_INT
    113 
    114 ; SI-CHECK: V_MOVRELS_B32_e32
    115 define void @short_array(i32 addrspace(1)* %out, i32 %index) {
    116 entry:
    117   %0 = alloca [2 x i16]
    118   %1 = getelementptr [2 x i16]* %0, i32 0, i32 0
    119   %2 = getelementptr [2 x i16]* %0, i32 0, i32 1
    120   store i16 0, i16* %1
    121   store i16 1, i16* %2
    122   %3 = getelementptr [2 x i16]* %0, i32 0, i32 %index
    123   %4 = load i16* %3
    124   %5 = sext i16 %4 to i32
    125   store i32 %5, i32 addrspace(1)* %out
    126   ret void
    127 }
    128 
    129 ; FUNC-LABEL: @char_array
    130 
    131 ; R600-CHECK: MOVA_INT
    132 
    133 ; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}, 0x100
    134 ; SI-CHECK: V_MOVRELS_B32_e32
    135 define void @char_array(i32 addrspace(1)* %out, i32 %index) {
    136 entry:
    137   %0 = alloca [2 x i8]
    138   %1 = getelementptr [2 x i8]* %0, i32 0, i32 0
    139   %2 = getelementptr [2 x i8]* %0, i32 0, i32 1
    140   store i8 0, i8* %1
    141   store i8 1, i8* %2
    142   %3 = getelementptr [2 x i8]* %0, i32 0, i32 %index
    143   %4 = load i8* %3
    144   %5 = sext i8 %4 to i32
    145   store i32 %5, i32 addrspace(1)* %out
    146   ret void
    147 
    148 }
    149 
    150 ; Make sure we don't overwrite workitem information with private memory
    151 
    152 ; FUNC-LABEL: @work_item_info
    153 ; R600-CHECK-NOT: MOV T0.X
    154 ; Additional check in case the move ends up in the last slot
    155 ; R600-CHECK-NOT: MOV * TO.X
    156 
    157 ; SI-CHECK-NOT: V_MOV_B32_e{{(32|64)}} v0
    158 define void @work_item_info(i32 addrspace(1)* %out, i32 %in) {
    159 entry:
    160   %0 = alloca [2 x i32]
    161   %1 = getelementptr [2 x i32]* %0, i32 0, i32 0
    162   %2 = getelementptr [2 x i32]* %0, i32 0, i32 1
    163   store i32 0, i32* %1
    164   store i32 1, i32* %2
    165   %3 = getelementptr [2 x i32]* %0, i32 0, i32 %in
    166   %4 = load i32* %3
    167   %5 = call i32 @llvm.r600.read.tidig.x()
    168   %6 = add i32 %4, %5
    169   store i32 %6, i32 addrspace(1)* %out
    170   ret void
    171 }
    172 
    173 ; Test that two stack objects are not stored in the same register
    174 ; The second stack object should be in T3.X
    175 ; FUNC-LABEL: @no_overlap
    176 ; R600_CHECK: MOV
    177 ; R600_CHECK: [[CHAN:[XYZW]]]+
    178 ; R600-CHECK-NOT: [[CHAN]]+
    179 ; SI-CHECK: V_MOV_B32_e32 v3
    180 define void @no_overlap(i32 addrspace(1)* %out, i32 %in) {
    181 entry:
    182   %0 = alloca [3 x i8], align 1
    183   %1 = alloca [2 x i8], align 1
    184   %2 = getelementptr [3 x i8]* %0, i32 0, i32 0
    185   %3 = getelementptr [3 x i8]* %0, i32 0, i32 1
    186   %4 = getelementptr [3 x i8]* %0, i32 0, i32 2
    187   %5 = getelementptr [2 x i8]* %1, i32 0, i32 0
    188   %6 = getelementptr [2 x i8]* %1, i32 0, i32 1
    189   store i8 0, i8* %2
    190   store i8 1, i8* %3
    191   store i8 2, i8* %4
    192   store i8 1, i8* %5
    193   store i8 0, i8* %6
    194   %7 = getelementptr [3 x i8]* %0, i32 0, i32 %in
    195   %8 = getelementptr [2 x i8]* %1, i32 0, i32 %in
    196   %9 = load i8* %7
    197   %10 = load i8* %8
    198   %11 = add i8 %9, %10
    199   %12 = sext i8 %11 to i32
    200   store i32 %12, i32 addrspace(1)* %out
    201   ret void
    202 }
    203 
    204 define void @char_array_array(i32 addrspace(1)* %out, i32 %index) {
    205 entry:
    206   %alloca = alloca [2 x [2 x i8]]
    207   %gep0 = getelementptr [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 0
    208   %gep1 = getelementptr [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 1
    209   store i8 0, i8* %gep0
    210   store i8 1, i8* %gep1
    211   %gep2 = getelementptr [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 %index
    212   %load = load i8* %gep2
    213   %sext = sext i8 %load to i32
    214   store i32 %sext, i32 addrspace(1)* %out
    215   ret void
    216 }
    217 
    218 define void @i32_array_array(i32 addrspace(1)* %out, i32 %index) {
    219 entry:
    220   %alloca = alloca [2 x [2 x i32]]
    221   %gep0 = getelementptr [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0
    222   %gep1 = getelementptr [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 1
    223   store i32 0, i32* %gep0
    224   store i32 1, i32* %gep1
    225   %gep2 = getelementptr [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 %index
    226   %load = load i32* %gep2
    227   store i32 %load, i32 addrspace(1)* %out
    228   ret void
    229 }
    230 
    231 define void @i64_array_array(i64 addrspace(1)* %out, i32 %index) {
    232 entry:
    233   %alloca = alloca [2 x [2 x i64]]
    234   %gep0 = getelementptr [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 0
    235   %gep1 = getelementptr [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 1
    236   store i64 0, i64* %gep0
    237   store i64 1, i64* %gep1
    238   %gep2 = getelementptr [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 %index
    239   %load = load i64* %gep2
    240   store i64 %load, i64 addrspace(1)* %out
    241   ret void
    242 }
    243 
    244 %struct.pair32 = type { i32, i32 }
    245 
    246 define void @struct_array_array(i32 addrspace(1)* %out, i32 %index) {
    247 entry:
    248   %alloca = alloca [2 x [2 x %struct.pair32]]
    249   %gep0 = getelementptr [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 0, i32 1
    250   %gep1 = getelementptr [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 1, i32 1
    251   store i32 0, i32* %gep0
    252   store i32 1, i32* %gep1
    253   %gep2 = getelementptr [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 %index, i32 0
    254   %load = load i32* %gep2
    255   store i32 %load, i32 addrspace(1)* %out
    256   ret void
    257 }
    258 
    259 define void @struct_pair32_array(i32 addrspace(1)* %out, i32 %index) {
    260 entry:
    261   %alloca = alloca [2 x %struct.pair32]
    262   %gep0 = getelementptr [2 x %struct.pair32]* %alloca, i32 0, i32 0, i32 1
    263   %gep1 = getelementptr [2 x %struct.pair32]* %alloca, i32 0, i32 1, i32 0
    264   store i32 0, i32* %gep0
    265   store i32 1, i32* %gep1
    266   %gep2 = getelementptr [2 x %struct.pair32]* %alloca, i32 0, i32 %index, i32 0
    267   %load = load i32* %gep2
    268   store i32 %load, i32 addrspace(1)* %out
    269   ret void
    270 }
    271 
    272 define void @select_private(i32 addrspace(1)* %out, i32 %in) nounwind {
    273 entry:
    274   %tmp = alloca [2 x i32]
    275   %tmp1 = getelementptr [2 x i32]* %tmp, i32 0, i32 0
    276   %tmp2 = getelementptr [2 x i32]* %tmp, i32 0, i32 1
    277   store i32 0, i32* %tmp1
    278   store i32 1, i32* %tmp2
    279   %cmp = icmp eq i32 %in, 0
    280   %sel = select i1 %cmp, i32* %tmp1, i32* %tmp2
    281   %load = load i32* %sel
    282   store i32 %load, i32 addrspace(1)* %out
    283   ret void
    284 }
    285 
    286