Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s
      2 
      3 
      4 ; CHECK-LABEL: @branch_ptr_var_same_alloca(
      5 ; CHECK: getelementptr inbounds [256 x [64 x i32]], [256 x [64 x i32]] addrspace(3)* @branch_ptr_var_same_alloca.alloca, i32 0, i32 %{{[0-9]+}}
      6 
      7 ; CHECK: if:
      8 ; CHECK: %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %a
      9 
     10 ; CHECK: else:
     11 ; CHECK: %arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* %15, i32 0, i32 %b
     12 
     13 ; CHECK: endif:
     14 ; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
     15 ; CHECK: store i32 0, i32 addrspace(3)* %phi.ptr, align 4
     16 define amdgpu_kernel void @branch_ptr_var_same_alloca(i32 %a, i32 %b) #0 {
     17 entry:
     18   %alloca = alloca [64 x i32], align 4
     19   br i1 undef, label %if, label %else
     20 
     21 if:
     22   %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
     23   br label %endif
     24 
     25 else:
     26   %arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %b
     27   br label %endif
     28 
     29 endif:
     30   %phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
     31   store i32 0, i32* %phi.ptr, align 4
     32   ret void
     33 }
     34 
     35 ; CHECK-LABEL: @branch_ptr_phi_alloca_null_0(
     36 ; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %if ], [ null, %entry ]
     37 define amdgpu_kernel void @branch_ptr_phi_alloca_null_0(i32 %a, i32 %b) #0 {
     38 entry:
     39   %alloca = alloca [64 x i32], align 4
     40   br i1 undef, label %if, label %endif
     41 
     42 if:
     43   %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
     44   br label %endif
     45 
     46 endif:
     47   %phi.ptr = phi i32* [ %arrayidx0, %if ], [ null, %entry ]
     48   store i32 0, i32* %phi.ptr, align 4
     49   ret void
     50 }
     51 
     52 ; CHECK-LABEL: @branch_ptr_phi_alloca_null_1(
     53 ; CHECK: %phi.ptr = phi i32 addrspace(3)*  [ null, %entry ], [ %arrayidx0, %if ]
     54 define amdgpu_kernel void @branch_ptr_phi_alloca_null_1(i32 %a, i32 %b) #0 {
     55 entry:
     56   %alloca = alloca [64 x i32], align 4
     57   br i1 undef, label %if, label %endif
     58 
     59 if:
     60   %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
     61   br label %endif
     62 
     63 endif:
     64   %phi.ptr = phi i32* [ null, %entry ], [ %arrayidx0, %if ]
     65   store i32 0, i32* %phi.ptr, align 4
     66   ret void
     67 }
     68 
     69 ; CHECK-LABEL: @one_phi_value(
     70 ; CHECK: getelementptr inbounds [256 x [64 x i32]], [256 x [64 x i32]] addrspace(3)* @one_phi_value.alloca, i32 0, i32 %14
     71 ; CHECK:  %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %a
     72 
     73 ; CHECK: br label %exit
     74 ; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %entry ]
     75 ; CHECK: store i32 0, i32 addrspace(3)* %phi.ptr, align 4
     76 define amdgpu_kernel void @one_phi_value(i32 %a) #0 {
     77 entry:
     78   %alloca = alloca [64 x i32], align 4
     79   %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
     80   br label %exit
     81 
     82 exit:
     83   %phi.ptr = phi i32* [ %arrayidx0, %entry ]
     84   store i32 0, i32* %phi.ptr, align 4
     85   ret void
     86 }
     87 
     88 ; CHECK-LABEL: @branch_ptr_alloca_unknown_obj(
     89 ; CHECK: %alloca = alloca [64 x i32], align 4
     90 
     91 ; CHECK: if:
     92 ; CHECK: %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
     93 
     94 ; CHECK: else:
     95 ; CHECK: %arrayidx1 = call i32* @get_unknown_pointer()
     96 
     97 ; CHECK: endif:
     98 ; CHECK: %phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
     99 ; CHECK: store i32 0, i32* %phi.ptr, align 4
    100 define amdgpu_kernel void @branch_ptr_alloca_unknown_obj(i32 %a, i32 %b) #0 {
    101 entry:
    102   %alloca = alloca [64 x i32], align 4
    103   br i1 undef, label %if, label %else
    104 
    105 if:
    106   %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
    107   br label %endif
    108 
    109 else:
    110   %arrayidx1 = call i32* @get_unknown_pointer()
    111   br label %endif
    112 
    113 endif:
    114   %phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
    115   store i32 0, i32* %phi.ptr, align 4
    116   ret void
    117 }
    118 
    119 ; kernel void ptr_induction_var_same_alloca(void)
    120 ; {
    121 ;     int alloca[64];
    122 ;     int i = 0;
    123 
    124 ;     #pragma nounroll
    125 ;     for (int* p = &alloca[2], *e = &alloca[48]; p != e; ++p, ++i)
    126 ;     {
    127 ;         *p = i;
    128 ;     }
    129 ; }
    130 
    131 ; FIXME: This should be promotable. We need to use
    132 ; GetUnderlyingObjects when looking at the icmp user.
    133 
    134 ; CHECK-LABEL: @ptr_induction_var_same_alloca(
    135 ; CHECK: %alloca = alloca [64 x i32], align 4
    136 ; CHECK: phi i32* [ %arrayidx, %entry ], [ %incdec.ptr, %for.body ]
    137 define amdgpu_kernel void @ptr_induction_var_same_alloca() #0 {
    138 entry:
    139   %alloca = alloca [64 x i32], align 4
    140   %arrayidx = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 2
    141   %arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 48
    142   br label %for.body
    143 
    144 for.cond.cleanup:                                 ; preds = %for.body
    145   ret void
    146 
    147 for.body:                                         ; preds = %for.body, %entry
    148   %i.09 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
    149   %p.08 = phi i32* [ %arrayidx, %entry ], [ %incdec.ptr, %for.body ]
    150   store i32 %i.09, i32* %p.08, align 4
    151   %incdec.ptr = getelementptr inbounds i32, i32* %p.08, i32 1
    152   %inc = add nuw nsw i32 %i.09, 1
    153   %cmp = icmp eq i32* %incdec.ptr, %arrayidx1
    154   br i1 %cmp, label %for.cond.cleanup, label %for.body
    155 }
    156 
    157 
    158 ; extern int* get_unknown_pointer(void);
    159 
    160 ; kernel void ptr_induction_var_alloca_unknown(void)
    161 ; {
    162 ;     int alloca[64];
    163 ;     int i = 0;
    164 ;
    165 ;     for (int* p = &alloca[2], *e = get_unknown_pointer(); p != e; ++p, ++i)
    166 ;     {
    167 ;         *p = i;
    168 ;     }
    169 ; }
    170 
    171 ; CHECK-LABEL: @ptr_induction_var_alloca_unknown(
    172 ; CHECK: %alloca = alloca [64 x i32], align 4
    173 ; CHECK: %p.08 = phi i32* [ %incdec.ptr, %for.body ], [ %arrayidx, %for.body.preheader ]
    174 ; CHECK: %cmp = icmp eq i32* %incdec.ptr, %call
    175 define amdgpu_kernel void @ptr_induction_var_alloca_unknown() #0 {
    176 entry:
    177   %alloca = alloca [64 x i32], align 4
    178   %arrayidx = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 2
    179   %call = tail call i32* @get_unknown_pointer() #2
    180   %cmp.7 = icmp eq i32* %arrayidx, %call
    181   br i1 %cmp.7, label %for.cond.cleanup, label %for.body.preheader
    182 
    183 for.body.preheader:                               ; preds = %entry
    184   br label %for.body
    185 
    186 for.cond.cleanup.loopexit:                        ; preds = %for.body
    187   br label %for.cond.cleanup
    188 
    189 for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
    190   ret void
    191 
    192 for.body:                                         ; preds = %for.body, %for.body.preheader
    193   %i.09 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
    194   %p.08 = phi i32* [ %incdec.ptr, %for.body ], [ %arrayidx, %for.body.preheader ]
    195   store i32 %i.09, i32* %p.08, align 4
    196   %incdec.ptr = getelementptr inbounds i32, i32* %p.08, i32 1
    197   %inc = add nuw nsw i32 %i.09, 1
    198   %cmp = icmp eq i32* %incdec.ptr, %call
    199   br i1 %cmp, label %for.cond.cleanup.loopexit, label %for.body
    200 }
    201 
    202 declare i32* @get_unknown_pointer() #0
    203 
    204 attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" }
    205