Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s
      2 
      3 ; CHECK-LABEL: @lds_promoted_alloca_select_invalid_pointer_operand(
      4 ; CHECK: %alloca = alloca i32
      5 ; CHECK: select i1 undef, i32* undef, i32* %alloca
      6 define void @lds_promoted_alloca_select_invalid_pointer_operand() #0 {
      7   %alloca = alloca i32, align 4
      8   %select = select i1 undef, i32* undef, i32* %alloca
      9   store i32 0, i32* %select, align 4
     10   ret void
     11 }
     12 
     13 ; CHECK-LABEL: @lds_promote_alloca_select_two_derived_pointers(
     14 ; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promote_alloca_select_two_derived_pointers.alloca, i32 0, i32 %{{[0-9]+}}
     15 ; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a
     16 ; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %b
     17 ; CHECK: %select = select i1 undef, i32 addrspace(3)* %ptr0, i32 addrspace(3)* %ptr1
     18 ; CHECK: store i32 0, i32 addrspace(3)* %select, align 4
     19 define void @lds_promote_alloca_select_two_derived_pointers(i32 %a, i32 %b) #0 {
     20   %alloca = alloca [16 x i32], align 4
     21   %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
     22   %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b
     23   %select = select i1 undef, i32* %ptr0, i32* %ptr1
     24   store i32 0, i32* %select, align 4
     25   ret void
     26 }
     27 
     28 ; FIXME: This should be promotable but requires knowing that both will be promoted first.
     29 
     30 ; CHECK-LABEL: @lds_promote_alloca_select_two_allocas(
     31 ; CHECK: %alloca0 = alloca i32, i32 16, align 4
     32 ; CHECK: %alloca1 = alloca i32, i32 16, align 4
     33 ; CHECK: %ptr0 = getelementptr inbounds i32, i32* %alloca0, i32 %a
     34 ; CHECK: %ptr1 = getelementptr inbounds i32, i32* %alloca1, i32 %b
     35 ; CHECK: %select = select i1 undef, i32* %ptr0, i32* %ptr1
     36 define void @lds_promote_alloca_select_two_allocas(i32 %a, i32 %b) #0 {
     37   %alloca0 = alloca i32, i32 16, align 4
     38   %alloca1 = alloca i32, i32 16, align 4
     39   %ptr0 = getelementptr inbounds i32, i32* %alloca0, i32 %a
     40   %ptr1 = getelementptr inbounds i32, i32* %alloca1, i32 %b
     41   %select = select i1 undef, i32* %ptr0, i32* %ptr1
     42   store i32 0, i32* %select, align 4
     43   ret void
     44 }
     45 
     46 ; TODO: Maybe this should be canonicalized to select on the constant and GEP after.
     47 ; CHECK-LABEL: @lds_promote_alloca_select_two_derived_constant_pointers(
     48 ; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promote_alloca_select_two_derived_constant_pointers.alloca, i32 0, i32 %{{[0-9]+}}
     49 ; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 1
     50 ; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 3
     51 ; CHECK: %select = select i1 undef, i32 addrspace(3)* %ptr0, i32 addrspace(3)* %ptr1
     52 ; CHECK: store i32 0, i32 addrspace(3)* %select, align 4
     53 define void @lds_promote_alloca_select_two_derived_constant_pointers() #0 {
     54   %alloca = alloca [16 x i32], align 4
     55   %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 1
     56   %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 3
     57   %select = select i1 undef, i32* %ptr0, i32* %ptr1
     58   store i32 0, i32* %select, align 4
     59   ret void
     60 }
     61 
     62 ; CHECK-LABEL: @lds_promoted_alloca_select_input_select(
     63 ; CHECK: getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_select_input_select.alloca, i32 0, i32 %{{[0-9]+}}
     64 ; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %a
     65 ; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %b
     66 ; CHECK: %ptr2 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %c
     67 ; CHECK: %select0 = select i1 undef, i32 addrspace(3)* %ptr0, i32 addrspace(3)* %ptr1
     68 ; CHECK: %select1 = select i1 undef, i32 addrspace(3)* %select0, i32 addrspace(3)* %ptr2
     69 ; CHECK: store i32 0, i32 addrspace(3)* %select1, align 4
     70 define void @lds_promoted_alloca_select_input_select(i32 %a, i32 %b, i32 %c) #0 {
     71   %alloca = alloca [16 x i32], align 4
     72   %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
     73   %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b
     74   %ptr2 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %c
     75   %select0 = select i1 undef, i32* %ptr0, i32* %ptr1
     76   %select1 = select i1 undef, i32* %select0, i32* %ptr2
     77   store i32 0, i32* %select1, align 4
     78   ret void
     79 }
     80 
     81 define void @lds_promoted_alloca_select_input_phi(i32 %a, i32 %b, i32 %c) #0 {
     82 entry:
     83   %alloca = alloca [16 x i32], align 4
     84   %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
     85   %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b
     86   store i32 0, i32* %ptr0
     87   br i1 undef, label %bb1, label %bb2
     88 
     89 bb1:
     90   %ptr2 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %c
     91   %select0 = select i1 undef, i32* undef, i32* %ptr2
     92   store i32 0, i32* %ptr1
     93   br label %bb2
     94 
     95 bb2:
     96   %phi.ptr = phi i32* [ %ptr0, %entry ], [ %select0, %bb1 ]
     97   %select1 = select i1 undef, i32* %phi.ptr, i32* %ptr1
     98   store i32 0, i32* %select1, align 4
     99   ret void
    100 }
    101 
    102 ; CHECK-LABEL: @select_null_rhs(
    103 ; CHECK-NOT: alloca
    104 ; CHECK: select i1 %tmp2, double addrspace(3)* %{{[0-9]+}}, double addrspace(3)* null
    105 define void @select_null_rhs(double addrspace(1)* nocapture %arg, i32 %arg1) #1 {
    106 bb:
    107   %tmp = alloca double, align 8
    108   store double 0.000000e+00, double* %tmp, align 8
    109   %tmp2 = icmp eq i32 %arg1, 0
    110   %tmp3 = select i1 %tmp2, double* %tmp, double* null
    111   store double 1.000000e+00, double* %tmp3, align 8
    112   %tmp4 = load double, double* %tmp, align 8
    113   store double %tmp4, double addrspace(1)* %arg
    114   ret void
    115 }
    116 
    117 ; CHECK-LABEL: @select_null_lhs(
    118 ; CHECK-NOT: alloca
    119 ; CHECK: select i1 %tmp2, double addrspace(3)* null, double addrspace(3)* %{{[0-9]+}}
    120 define void @select_null_lhs(double addrspace(1)* nocapture %arg, i32 %arg1) #1 {
    121 bb:
    122   %tmp = alloca double, align 8
    123   store double 0.000000e+00, double* %tmp, align 8
    124   %tmp2 = icmp eq i32 %arg1, 0
    125   %tmp3 = select i1 %tmp2, double* null, double* %tmp
    126   store double 1.000000e+00, double* %tmp3, align 8
    127   %tmp4 = load double, double* %tmp, align 8
    128   store double %tmp4, double addrspace(1)* %arg
    129   ret void
    130 }
    131 
    132 attributes #0 = { norecurse nounwind "amdgpu-max-waves-per-eu"="1" }
    133 attributes #1 = { norecurse nounwind }