Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca < %s | FileCheck %s
      2 
      3 ; CHECK: @promote_alloca_size_63.stack = internal unnamed_addr addrspace(3) global [63 x [5 x i32]] undef, align 4
      4 
      5 define void @promote_alloca_size_63(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
      6 entry:
      7   %stack = alloca [5 x i32], align 4
      8   %0 = load i32, i32 addrspace(1)* %in, align 4
      9   %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0
     10   store i32 4, i32* %arrayidx1, align 4
     11   %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
     12   %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
     13   %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1
     14   store i32 5, i32* %arrayidx3, align 4
     15   %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
     16   %2 = load i32, i32* %arrayidx10, align 4
     17   store i32 %2, i32 addrspace(1)* %out, align 4
     18   %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
     19   %3 = load i32, i32* %arrayidx12
     20   %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
     21   store i32 %3, i32 addrspace(1)* %arrayidx13
     22   ret void
     23 }
     24 
     25 ; CHECK: @promote_alloca_size_256.stack = internal unnamed_addr addrspace(3) global [256 x [5 x i32]] undef, align 4
     26 
     27 define void @promote_alloca_size_256(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #1 {
     28 entry:
     29   %stack = alloca [5 x i32], align 4
     30   %0 = load i32, i32 addrspace(1)* %in, align 4
     31   %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0
     32   store i32 4, i32* %arrayidx1, align 4
     33   %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
     34   %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
     35   %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1
     36   store i32 5, i32* %arrayidx3, align 4
     37   %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
     38   %2 = load i32, i32* %arrayidx10, align 4
     39   store i32 %2, i32 addrspace(1)* %out, align 4
     40   %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
     41   %3 = load i32, i32* %arrayidx12
     42   %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
     43   store i32 %3, i32 addrspace(1)* %arrayidx13
     44   ret void
     45 }
     46 
     47 ; CHECK: @promote_alloca_size_1600.stack = internal unnamed_addr addrspace(3) global [1600 x [5 x i32]] undef, align 4
     48 
     49 define void @promote_alloca_size_1600(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #2 {
     50 entry:
     51   %stack = alloca [5 x i32], align 4
     52   %0 = load i32, i32 addrspace(1)* %in, align 4
     53   %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0
     54   store i32 4, i32* %arrayidx1, align 4
     55   %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
     56   %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
     57   %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1
     58   store i32 5, i32* %arrayidx3, align 4
     59   %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
     60   %2 = load i32, i32* %arrayidx10, align 4
     61   store i32 %2, i32 addrspace(1)* %out, align 4
     62   %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
     63   %3 = load i32, i32* %arrayidx12
     64   %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
     65   store i32 %3, i32 addrspace(1)* %arrayidx13
     66   ret void
     67 }
     68 
     69 ; CHECK: @occupancy_0(
     70 ; CHECK: alloca [5 x i32]
     71 define void @occupancy_0(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #3 {
     72 entry:
     73   %stack = alloca [5 x i32], align 4
     74   %0 = load i32, i32 addrspace(1)* %in, align 4
     75   %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0
     76   store i32 4, i32* %arrayidx1, align 4
     77   %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
     78   %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
     79   %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1
     80   store i32 5, i32* %arrayidx3, align 4
     81   %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
     82   %2 = load i32, i32* %arrayidx10, align 4
     83   store i32 %2, i32 addrspace(1)* %out, align 4
     84   %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
     85   %3 = load i32, i32* %arrayidx12
     86   %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
     87   store i32 %3, i32 addrspace(1)* %arrayidx13
     88   ret void
     89 }
     90 
     91 ; CHECK: @occupancy_max(
     92 ; CHECK: alloca [5 x i32]
     93 define void @occupancy_max(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #4 {
     94 entry:
     95   %stack = alloca [5 x i32], align 4
     96   %0 = load i32, i32 addrspace(1)* %in, align 4
     97   %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0
     98   store i32 4, i32* %arrayidx1, align 4
     99   %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
    100   %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
    101   %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1
    102   store i32 5, i32* %arrayidx3, align 4
    103   %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
    104   %2 = load i32, i32* %arrayidx10, align 4
    105   store i32 %2, i32 addrspace(1)* %out, align 4
    106   %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
    107   %3 = load i32, i32* %arrayidx12
    108   %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
    109   store i32 %3, i32 addrspace(1)* %arrayidx13
    110   ret void
    111 }
    112 
    113 attributes #0 = { nounwind "amdgpu-max-work-group-size"="63" }
    114 attributes #1 = { nounwind "amdgpu-max-waves-per-eu"="3" "amdgpu-max-work-group-size"="256" }
    115 attributes #2 = { nounwind "amdgpu-max-waves-per-eu"="1" "amdgpu-max-work-group-size"="1600" }
    116 attributes #3 = { nounwind "amdgpu-max-waves-per-eu"="0" }
    117 attributes #4 = { nounwind "amdgpu-max-waves-per-eu"="-1" }
    118