Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
      2 
      3 @local_memory.local_mem = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4
      4 
      5 ; Check that the LDS size emitted correctly
      6 ; EG: .long 166120
      7 ; EG-NEXT: .long 128
      8 
      9 ; FUNC-LABEL: {{^}}local_memory:
     10 
     11 ; EG: LDS_WRITE
     12 
     13 ; GROUP_BARRIER must be the last instruction in a clause
     14 ; EG: GROUP_BARRIER
     15 ; EG-NEXT: ALU clause
     16 
     17 ; EG: LDS_READ_RET
     18 define amdgpu_kernel void @local_memory(i32 addrspace(1)* %out) #0 {
     19 entry:
     20   %y.i = call i32 @llvm.r600.read.tidig.x() #1
     21   %arrayidx = getelementptr inbounds [128 x i32], [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %y.i
     22   store i32 %y.i, i32 addrspace(3)* %arrayidx, align 4
     23   %add = add nsw i32 %y.i, 1
     24   %cmp = icmp eq i32 %add, 16
     25   %.add = select i1 %cmp, i32 0, i32 %add
     26   call void @llvm.r600.group.barrier()
     27   %arrayidx1 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %.add
     28   %tmp = load i32, i32 addrspace(3)* %arrayidx1, align 4
     29   %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %y.i
     30   store i32 %tmp, i32 addrspace(1)* %arrayidx2, align 4
     31   ret void
     32 }
     33 
     34 @local_memory_two_objects.local_mem0 = internal unnamed_addr addrspace(3) global [4 x i32] undef, align 4
     35 @local_memory_two_objects.local_mem1 = internal unnamed_addr addrspace(3) global [4 x i32] undef, align 4
     36 
     37 ; Check that the LDS size emitted correctly
     38 ; EG: .long 166120
     39 ; EG-NEXT: .long 8
     40 ; GCN: .long 47180
     41 ; GCN-NEXT: .long 32900
     42 
     43 ; FUNC-LABEL: {{^}}local_memory_two_objects:
     44 
     45 ; We would like to check the lds writes are using different
     46 ; addresses, but due to variations in the scheduler, we can't do
     47 ; this consistently on evergreen GPUs.
     48 ; EG: LDS_WRITE
     49 ; EG: LDS_WRITE
     50 
     51 ; GROUP_BARRIER must be the last instruction in a clause
     52 ; EG: GROUP_BARRIER
     53 ; EG-NEXT: ALU clause
     54 
     55 ; Make sure the lds reads are using different addresses, at different
     56 ; constant offsets.
     57 ; EG: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]]
     58 ; EG-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
     59 
     60 define amdgpu_kernel void @local_memory_two_objects(i32 addrspace(1)* %out) #0 {
     61 entry:
     62   %x.i = call i32 @llvm.r600.read.tidig.x() #1
     63   %arrayidx = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem0, i32 0, i32 %x.i
     64   store i32 %x.i, i32 addrspace(3)* %arrayidx, align 4
     65   %mul = shl nsw i32 %x.i, 1
     66   %arrayidx1 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem1, i32 0, i32 %x.i
     67   store i32 %mul, i32 addrspace(3)* %arrayidx1, align 4
     68   %sub = sub nsw i32 3, %x.i
     69   call void @llvm.r600.group.barrier()
     70   %arrayidx2 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem0, i32 0, i32 %sub
     71   %tmp = load i32, i32 addrspace(3)* %arrayidx2, align 4
     72   %arrayidx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %x.i
     73   store i32 %tmp, i32 addrspace(1)* %arrayidx3, align 4
     74   %arrayidx4 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem1, i32 0, i32 %sub
     75   %tmp1 = load i32, i32 addrspace(3)* %arrayidx4, align 4
     76   %add = add nsw i32 %x.i, 4
     77   %arrayidx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %add
     78   store i32 %tmp1, i32 addrspace(1)* %arrayidx5, align 4
     79   ret void
     80 }
     81 
     82 declare i32 @llvm.r600.read.tidig.x() #1
     83 declare void @llvm.r600.group.barrier() #2
     84 
     85 attributes #0 = { nounwind }
     86 attributes #1 = { nounwind readnone }
     87 attributes #2 = { convergent nounwind }
     88