Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-promote-alloca < %s | FileCheck --check-prefix=OPT %s
      2 
      3 ; Make sure that array alloca loaded and stored as multi-element aggregates are handled correctly
      4 ; Strictly the promote-alloca pass shouldn't have to deal with this case as it is non-canonical, but
      5 ; the pass should handle it gracefully if it is
      6 ; The checks look for lines that previously caused issues in PromoteAlloca (non-canonical). Opt
      7 ; should now leave these unchanged
      8 
      9 ; OPT-LABEL: @promote_1d_aggr(
     10 ; OPT: store [1 x float] %tmp3, [1 x float]* %f1
     11 
     12 %Block = type { [1 x float], i32 }
     13 %gl_PerVertex = type { <4 x float>, float, [1 x float], [1 x float] }
     14 %struct = type { i32, i32 }
     15 
     16 @block = external addrspace(1) global %Block
     17 @pv = external addrspace(1) global %gl_PerVertex
     18 
     19 define amdgpu_vs void @promote_1d_aggr() #0 {
     20   %i = alloca i32
     21   %f1 = alloca [1 x float]
     22   %tmp = getelementptr %Block, %Block addrspace(1)* @block, i32 0, i32 1
     23   %tmp1 = load i32, i32 addrspace(1)* %tmp
     24   store i32 %tmp1, i32* %i
     25   %tmp2 = getelementptr %Block, %Block addrspace(1)* @block, i32 0, i32 0
     26   %tmp3 = load [1 x float], [1 x float] addrspace(1)* %tmp2
     27   store [1 x float] %tmp3, [1 x float]* %f1
     28   %tmp4 = load i32, i32* %i
     29   %tmp5 = getelementptr [1 x float], [1 x float]* %f1, i32 0, i32 %tmp4
     30   %tmp6 = load float, float* %tmp5
     31   %tmp7 = alloca <4 x float>
     32   %tmp8 = load <4 x float>, <4 x float>* %tmp7
     33   %tmp9 = insertelement <4 x float> %tmp8, float %tmp6, i32 0
     34   %tmp10 = insertelement <4 x float> %tmp9, float %tmp6, i32 1
     35   %tmp11 = insertelement <4 x float> %tmp10, float %tmp6, i32 2
     36   %tmp12 = insertelement <4 x float> %tmp11, float %tmp6, i32 3
     37   %tmp13 = getelementptr %gl_PerVertex, %gl_PerVertex addrspace(1)* @pv, i32 0, i32 0
     38   store <4 x float> %tmp12, <4 x float> addrspace(1)* %tmp13
     39   ret void
     40 }
     41 
     42 
     43 ; OPT-LABEL: @promote_store_aggr(
     44 ; OPT: %tmp6 = load [2 x float], [2 x float]* %f1
     45 
     46 %Block2 = type { i32, [2 x float] }
     47 @block2 = external addrspace(1) global %Block2
     48 
     49 define amdgpu_vs void @promote_store_aggr() #0 {
     50   %i = alloca i32
     51   %f1 = alloca [2 x float]
     52   %tmp = getelementptr %Block2, %Block2 addrspace(1)* @block2, i32 0, i32 0
     53   %tmp1 = load i32, i32 addrspace(1)* %tmp
     54   store i32 %tmp1, i32* %i
     55   %tmp2 = load i32, i32* %i
     56   %tmp3 = sitofp i32 %tmp2 to float
     57   %tmp4 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 0
     58   store float %tmp3, float* %tmp4
     59   %tmp5 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 1
     60   store float 2.000000e+00, float* %tmp5
     61   %tmp6 = load [2 x float], [2 x float]* %f1
     62   %tmp7 = getelementptr %Block2, %Block2 addrspace(1)* @block2, i32 0, i32 1
     63   store [2 x float] %tmp6, [2 x float] addrspace(1)* %tmp7
     64   %tmp8 = getelementptr %gl_PerVertex, %gl_PerVertex addrspace(1)* @pv, i32 0, i32 0
     65   store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> addrspace(1)* %tmp8
     66   ret void
     67 }
     68 
     69 ; OPT-LABEL: @promote_load_from_store_aggr(
     70 ; OPT: store [2 x float] %tmp3, [2 x float]* %f1
     71 
     72 %Block3 = type { [2 x float], i32 }
     73 @block3 = external addrspace(1) global %Block3
     74 
     75 define amdgpu_vs void @promote_load_from_store_aggr() #0 {
     76   %i = alloca i32
     77   %f1 = alloca [2 x float]
     78   %tmp = getelementptr %Block3, %Block3 addrspace(1)* @block3, i32 0, i32 1
     79   %tmp1 = load i32, i32 addrspace(1)* %tmp
     80   store i32 %tmp1, i32* %i
     81   %tmp2 = getelementptr %Block3, %Block3 addrspace(1)* @block3, i32 0, i32 0
     82   %tmp3 = load [2 x float], [2 x float] addrspace(1)* %tmp2
     83   store [2 x float] %tmp3, [2 x float]* %f1
     84   %tmp4 = load i32, i32* %i
     85   %tmp5 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 %tmp4
     86   %tmp6 = load float, float* %tmp5
     87   %tmp7 = alloca <4 x float>
     88   %tmp8 = load <4 x float>, <4 x float>* %tmp7
     89   %tmp9 = insertelement <4 x float> %tmp8, float %tmp6, i32 0
     90   %tmp10 = insertelement <4 x float> %tmp9, float %tmp6, i32 1
     91   %tmp11 = insertelement <4 x float> %tmp10, float %tmp6, i32 2
     92   %tmp12 = insertelement <4 x float> %tmp11, float %tmp6, i32 3
     93   %tmp13 = getelementptr %gl_PerVertex, %gl_PerVertex addrspace(1)* @pv, i32 0, i32 0
     94   store <4 x float> %tmp12, <4 x float> addrspace(1)* %tmp13
     95   ret void
     96 }
     97 
     98 ; OPT-LABEL: @promote_double_aggr(
     99 ; OPT: store [2 x double] %tmp5, [2 x double]* %s
    100 
    101 @tmp_g = external addrspace(1) global { [4 x double], <2 x double>, <3 x double>, <4 x double> }
    102 @frag_color = external addrspace(1) global <4 x float>
    103 
    104 define amdgpu_ps void @promote_double_aggr() #0 {
    105   %s = alloca [2 x double]
    106   %tmp = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 0
    107   %tmp1 = load double, double addrspace(1)* %tmp
    108   %tmp2 = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 1
    109   %tmp3 = load double, double addrspace(1)* %tmp2
    110   %tmp4 = insertvalue [2 x double] undef, double %tmp1, 0
    111   %tmp5 = insertvalue [2 x double] %tmp4, double %tmp3, 1
    112   store [2 x double] %tmp5, [2 x double]* %s
    113   %tmp6 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1
    114   %tmp7 = load double, double* %tmp6
    115   %tmp8 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1
    116   %tmp9 = load double, double* %tmp8
    117   %tmp10 = fadd double %tmp7, %tmp9
    118   %tmp11 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 0
    119   store double %tmp10, double* %tmp11
    120   %tmp12 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 0
    121   %tmp13 = load double, double* %tmp12
    122   %tmp14 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1
    123   %tmp15 = load double, double* %tmp14
    124   %tmp16 = fadd double %tmp13, %tmp15
    125   %tmp17 = fptrunc double %tmp16 to float
    126   %tmp18 = insertelement <4 x float> undef, float %tmp17, i32 0
    127   %tmp19 = insertelement <4 x float> %tmp18, float %tmp17, i32 1
    128   %tmp20 = insertelement <4 x float> %tmp19, float %tmp17, i32 2
    129   %tmp21 = insertelement <4 x float> %tmp20, float %tmp17, i32 3
    130   store <4 x float> %tmp21, <4 x float> addrspace(1)* @frag_color
    131   ret void
    132 }
    133 
    134 ; Don't crash on a type that isn't a valid vector element.
    135 ; OPT-LABEL: @alloca_struct(
    136 define amdgpu_kernel void @alloca_struct() #0 {
    137 entry:
    138   %alloca = alloca [2 x %struct], align 4
    139   ret void
    140 }
    141