Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: opt -data-layout=A5 -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck -check-prefix=IR %s
      2 ; RUN: llc -march=amdgcn -mcpu=fiji -disable-promote-alloca-to-vector < %s | FileCheck -check-prefix=ASM %s
      3 
      4 ; IR-LABEL: define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 {
      5 ; IR: alloca [5 x i32]
      6 
      7 ; ASM-LABEL: {{^}}promote_alloca_shaders:
      8 ; ASM: ; ScratchSize: 24
      9 define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 {
     10 entry:
     11   %stack = alloca [5 x i32], align 4, addrspace(5)
     12   %tmp0 = load i32, i32 addrspace(1)* %in, align 4
     13   %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp0
     14   store i32 4, i32 addrspace(5)* %arrayidx1, align 4
     15   %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
     16   %tmp1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
     17   %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp1
     18   store i32 5, i32 addrspace(5)* %arrayidx3, align 4
     19   %arrayidx4 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
     20   %tmp2 = load i32, i32 addrspace(5)* %arrayidx4, align 4
     21   store i32 %tmp2, i32 addrspace(1)* %out, align 4
     22   %arrayidx5 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
     23   %tmp3 = load i32, i32 addrspace(5)* %arrayidx5
     24   %arrayidx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
     25   store i32 %tmp3, i32 addrspace(1)* %arrayidx6
     26   ret void
     27 }
     28 
     29 ; OPT-LABEL: @promote_to_vector_call_c(
     30 ; OPT-NOT: alloca
     31 ; OPT: extractelement <2 x i32> %{{[0-9]+}}, i32 %in
     32 
     33 ; ASM-LABEL: {{^}}promote_to_vector_call_c:
     34 ; ASM-NOT: LDSByteSize
     35 ; ASM: ; ScratchSize: 12
     36 define void @promote_to_vector_call_c(i32 addrspace(1)* %out, i32 %in) #0 {
     37 entry:
     38   %tmp = alloca [2 x i32], addrspace(5)
     39   %tmp1 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 0
     40   %tmp2 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 1
     41   store i32 0, i32 addrspace(5)* %tmp1
     42   store i32 1, i32 addrspace(5)* %tmp2
     43   %tmp3 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 %in
     44   %tmp4 = load i32, i32 addrspace(5)* %tmp3
     45   %tmp5 = load volatile i32, i32 addrspace(1)* undef
     46   %tmp6 = add i32 %tmp4, %tmp5
     47   store i32 %tmp6, i32 addrspace(1)* %out
     48   ret void
     49 }
     50 
     51 ; OPT-LABEL: @no_promote_to_lds_c(
     52 ; OPT: alloca
     53 
     54 ; ASM-LABEL: {{^}}no_promote_to_lds_c:
     55 ; ASM-NOT: LDSByteSize
     56 ; ASM: ; ScratchSize: 24
     57 define void @no_promote_to_lds_c(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
     58 entry:
     59   %stack = alloca [5 x i32], align 4, addrspace(5)
     60   %0 = load i32, i32 addrspace(1)* %in, align 4
     61   %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %0
     62   store i32 4, i32 addrspace(5)* %arrayidx1, align 4
     63   %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
     64   %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
     65   %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %1
     66   store i32 5, i32 addrspace(5)* %arrayidx3, align 4
     67   %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
     68   %2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
     69   store i32 %2, i32 addrspace(1)* %out, align 4
     70   %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
     71   %3 = load i32, i32 addrspace(5)* %arrayidx12
     72   %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
     73   store i32 %3, i32 addrspace(1)* %arrayidx13
     74   ret void
     75 }
     76 
     77 declare i32 @foo(i32 addrspace(5)*) #0
     78 
     79 ; ASM-LABEL: {{^}}call_private:
     80 ; ASM: buffer_store_dword
     81 ; ASM: buffer_store_dword
     82 ; ASM: s_swappc_b64
     83 ; ASM: ScratchSize: 16400
     84 define amdgpu_kernel void @call_private(i32 addrspace(1)* %out, i32 %in) #0 {
     85 entry:
     86   %tmp = alloca [2 x i32], addrspace(5)
     87   %tmp1 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 0
     88   %tmp2 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 1
     89   store i32 0, i32 addrspace(5)* %tmp1
     90   store i32 1, i32 addrspace(5)* %tmp2
     91   %tmp3 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 %in
     92   %val = call i32 @foo(i32 addrspace(5)* %tmp3)
     93   store i32 %val, i32 addrspace(1)* %out
     94   ret void
     95 }
     96 
     97 declare i32 @llvm.amdgcn.workitem.id.x() #1
     98 
     99 attributes #0 = { nounwind "amdgpu-max-work-group-size"="64" }
    100 attributes #1 = { nounwind readnone }
    101