Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-simplify-libcall < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-POSTLINK %s
      2 ; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-simplify-libcall -amdgpu-prelink  <%s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-PRELINK %s
      3 ; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-use-native -amdgpu-prelink < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NATIVE %s
      4 
      5 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos
      6 ; GCN-POSTLINK: tail call fast float @_Z3sinf(
      7 ; GCN-POSTLINK: tail call fast float @_Z3cosf(
      8 ; GCN-PRELINK: call fast float @_Z6sincosfPf(
      9 ; GCN-NATIVE: tail call fast float @_Z10native_sinf(
     10 ; GCN-NATIVE: tail call fast float @_Z10native_cosf(
     11 define amdgpu_kernel void @test_sincos(float addrspace(1)* nocapture %a) {
     12 entry:
     13   %tmp = load float, float addrspace(1)* %a, align 4
     14   %call = tail call fast float @_Z3sinf(float %tmp)
     15   store float %call, float addrspace(1)* %a, align 4
     16   %call2 = tail call fast float @_Z3cosf(float %tmp)
     17   %arrayidx3 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
     18   store float %call2, float addrspace(1)* %arrayidx3, align 4
     19   ret void
     20 }
     21 
     22 declare float @_Z3sinf(float)
     23 
     24 declare float @_Z3cosf(float)
     25 
     26 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v2
     27 ; GCN-POSTLINK: tail call fast <2 x float> @_Z3sinDv2_f(
     28 ; GCN-POSTLINK: tail call fast <2 x float> @_Z3cosDv2_f(
     29 ; GCN-PRELINK: call fast <2 x float> @_Z6sincosDv2_fPS_(
     30 ; GCN-NATIVE: tail call fast <2 x float> @_Z10native_sinDv2_f(
     31 ; GCN-NATIVE: tail call fast <2 x float> @_Z10native_cosDv2_f(
     32 define amdgpu_kernel void @test_sincos_v2(<2 x float> addrspace(1)* nocapture %a) {
     33 entry:
     34   %tmp = load <2 x float>, <2 x float> addrspace(1)* %a, align 8
     35   %call = tail call fast <2 x float> @_Z3sinDv2_f(<2 x float> %tmp)
     36   store <2 x float> %call, <2 x float> addrspace(1)* %a, align 8
     37   %call2 = tail call fast <2 x float> @_Z3cosDv2_f(<2 x float> %tmp)
     38   %arrayidx3 = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %a, i64 1
     39   store <2 x float> %call2, <2 x float> addrspace(1)* %arrayidx3, align 8
     40   ret void
     41 }
     42 
     43 declare <2 x float> @_Z3sinDv2_f(<2 x float>)
     44 
     45 declare <2 x float> @_Z3cosDv2_f(<2 x float>)
     46 
     47 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v3
     48 ; GCN-POSTLINK: tail call fast <3 x float> @_Z3sinDv3_f(
     49 ; GCN-POSTLINK: tail call fast <3 x float> @_Z3cosDv3_f(
     50 ; GCN-PRELINK: call fast <3 x float> @_Z6sincosDv3_fPS_(
     51 ; GCN-NATIVE: tail call fast <3 x float> @_Z10native_sinDv3_f(
     52 ; GCN-NATIVE: tail call fast <3 x float> @_Z10native_cosDv3_f(
     53 define amdgpu_kernel void @test_sincos_v3(<3 x float> addrspace(1)* nocapture %a) {
     54 entry:
     55   %castToVec4 = bitcast <3 x float> addrspace(1)* %a to <4 x float> addrspace(1)*
     56   %loadVec4 = load <4 x float>, <4 x float> addrspace(1)* %castToVec4, align 16
     57   %extractVec4 = shufflevector <4 x float> %loadVec4, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
     58   %call = tail call fast <3 x float> @_Z3sinDv3_f(<3 x float> %extractVec4)
     59   %extractVec6 = shufflevector <3 x float> %call, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
     60   store <4 x float> %extractVec6, <4 x float> addrspace(1)* %castToVec4, align 16
     61   %call11 = tail call fast <3 x float> @_Z3cosDv3_f(<3 x float> %extractVec4)
     62   %arrayidx12 = getelementptr inbounds <3 x float>, <3 x float> addrspace(1)* %a, i64 1
     63   %extractVec13 = shufflevector <3 x float> %call11, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
     64   %storetmp14 = bitcast <3 x float> addrspace(1)* %arrayidx12 to <4 x float> addrspace(1)*
     65   store <4 x float> %extractVec13, <4 x float> addrspace(1)* %storetmp14, align 16
     66   ret void
     67 }
     68 
     69 declare <3 x float> @_Z3sinDv3_f(<3 x float>)
     70 
     71 declare <3 x float> @_Z3cosDv3_f(<3 x float>)
     72 
     73 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v4
     74 ; GCN-POSTLINK: tail call fast <4 x float> @_Z3sinDv4_f(
     75 ; GCN-POSTLINK: tail call fast <4 x float> @_Z3cosDv4_f(
     76 ; GCN-PRELINK: call fast <4 x float> @_Z6sincosDv4_fPS_(
     77 ; GCN-NATIVE: tail call fast <4 x float> @_Z10native_sinDv4_f(
     78 ; GCN-NATIVE: tail call fast <4 x float> @_Z10native_cosDv4_f(
     79 define amdgpu_kernel void @test_sincos_v4(<4 x float> addrspace(1)* nocapture %a) {
     80 entry:
     81   %tmp = load <4 x float>, <4 x float> addrspace(1)* %a, align 16
     82   %call = tail call fast <4 x float> @_Z3sinDv4_f(<4 x float> %tmp)
     83   store <4 x float> %call, <4 x float> addrspace(1)* %a, align 16
     84   %call2 = tail call fast <4 x float> @_Z3cosDv4_f(<4 x float> %tmp)
     85   %arrayidx3 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %a, i64 1
     86   store <4 x float> %call2, <4 x float> addrspace(1)* %arrayidx3, align 16
     87   ret void
     88 }
     89 
     90 declare <4 x float> @_Z3sinDv4_f(<4 x float>)
     91 
     92 declare <4 x float> @_Z3cosDv4_f(<4 x float>)
     93 
     94 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v8
     95 ; GCN-POSTLINK: tail call fast <8 x float> @_Z3sinDv8_f(
     96 ; GCN-POSTLINK: tail call fast <8 x float> @_Z3cosDv8_f(
     97 ; GCN-PRELINK: call fast <8 x float> @_Z6sincosDv8_fPS_(
     98 ; GCN-NATIVE: tail call fast <8 x float> @_Z10native_sinDv8_f(
     99 ; GCN-NATIVE: tail call fast <8 x float> @_Z10native_cosDv8_f(
    100 define amdgpu_kernel void @test_sincos_v8(<8 x float> addrspace(1)* nocapture %a) {
    101 entry:
    102   %tmp = load <8 x float>, <8 x float> addrspace(1)* %a, align 32
    103   %call = tail call fast <8 x float> @_Z3sinDv8_f(<8 x float> %tmp)
    104   store <8 x float> %call, <8 x float> addrspace(1)* %a, align 32
    105   %call2 = tail call fast <8 x float> @_Z3cosDv8_f(<8 x float> %tmp)
    106   %arrayidx3 = getelementptr inbounds <8 x float>, <8 x float> addrspace(1)* %a, i64 1
    107   store <8 x float> %call2, <8 x float> addrspace(1)* %arrayidx3, align 32
    108   ret void
    109 }
    110 
    111 declare <8 x float> @_Z3sinDv8_f(<8 x float>)
    112 
    113 declare <8 x float> @_Z3cosDv8_f(<8 x float>)
    114 
    115 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v16
    116 ; GCN-POSTLINK: tail call fast <16 x float> @_Z3sinDv16_f(
    117 ; GCN-POSTLINK: tail call fast <16 x float> @_Z3cosDv16_f(
    118 ; GCN-PRELINK: call fast <16 x float> @_Z6sincosDv16_fPS_(
    119 ; GCN-NATIVE: tail call fast <16 x float> @_Z10native_sinDv16_f(
    120 ; GCN-NATIVE: tail call fast <16 x float> @_Z10native_cosDv16_f(
    121 define amdgpu_kernel void @test_sincos_v16(<16 x float> addrspace(1)* nocapture %a) {
    122 entry:
    123   %tmp = load <16 x float>, <16 x float> addrspace(1)* %a, align 64
    124   %call = tail call fast <16 x float> @_Z3sinDv16_f(<16 x float> %tmp)
    125   store <16 x float> %call, <16 x float> addrspace(1)* %a, align 64
    126   %call2 = tail call fast <16 x float> @_Z3cosDv16_f(<16 x float> %tmp)
    127   %arrayidx3 = getelementptr inbounds <16 x float>, <16 x float> addrspace(1)* %a, i64 1
    128   store <16 x float> %call2, <16 x float> addrspace(1)* %arrayidx3, align 64
    129   ret void
    130 }
    131 
    132 declare <16 x float> @_Z3sinDv16_f(<16 x float>)
    133 
    134 declare <16 x float> @_Z3cosDv16_f(<16 x float>)
    135 
    136 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_native_recip
    137 ; GCN: store float 0x3FD5555560000000, float addrspace(1)* %a
    138 define amdgpu_kernel void @test_native_recip(float addrspace(1)* nocapture %a) {
    139 entry:
    140   %call = tail call fast float @_Z12native_recipf(float 3.000000e+00)
    141   store float %call, float addrspace(1)* %a, align 4
    142   ret void
    143 }
    144 
    145 declare float @_Z12native_recipf(float)
    146 
    147 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_half_recip
    148 ; GCN: store float 0x3FD5555560000000, float addrspace(1)* %a
    149 define amdgpu_kernel void @test_half_recip(float addrspace(1)* nocapture %a) {
    150 entry:
    151   %call = tail call fast float @_Z10half_recipf(float 3.000000e+00)
    152   store float %call, float addrspace(1)* %a, align 4
    153   ret void
    154 }
    155 
    156 declare float @_Z10half_recipf(float)
    157 
    158 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_native_divide
    159 ; GCN: fmul fast float %tmp, 0x3FD5555560000000
    160 define amdgpu_kernel void @test_native_divide(float addrspace(1)* nocapture %a) {
    161 entry:
    162   %tmp = load float, float addrspace(1)* %a, align 4
    163   %call = tail call fast float @_Z13native_divideff(float %tmp, float 3.000000e+00)
    164   store float %call, float addrspace(1)* %a, align 4
    165   ret void
    166 }
    167 
    168 declare float @_Z13native_divideff(float, float)
    169 
    170 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_half_divide
    171 ; GCN: fmul fast float %tmp, 0x3FD5555560000000
    172 define amdgpu_kernel void @test_half_divide(float addrspace(1)* nocapture %a) {
    173 entry:
    174   %tmp = load float, float addrspace(1)* %a, align 4
    175   %call = tail call fast float @_Z11half_divideff(float %tmp, float 3.000000e+00)
    176   store float %call, float addrspace(1)* %a, align 4
    177   ret void
    178 }
    179 
    180 declare float @_Z11half_divideff(float, float)
    181 
    182 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_0f
    183 ; GCN: store float 1.000000e+00, float addrspace(1)* %a
    184 define amdgpu_kernel void @test_pow_0f(float addrspace(1)* nocapture %a) {
    185 entry:
    186   %tmp = load float, float addrspace(1)* %a, align 4
    187   %call = tail call fast float @_Z3powff(float %tmp, float 0.000000e+00)
    188   store float %call, float addrspace(1)* %a, align 4
    189   ret void
    190 }
    191 
    192 declare float @_Z3powff(float, float)
    193 
    194 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_0i
    195 ; GCN: store float 1.000000e+00, float addrspace(1)* %a
    196 define amdgpu_kernel void @test_pow_0i(float addrspace(1)* nocapture %a) {
    197 entry:
    198   %tmp = load float, float addrspace(1)* %a, align 4
    199   %call = tail call fast float @_Z3powff(float %tmp, float 0.000000e+00)
    200   store float %call, float addrspace(1)* %a, align 4
    201   ret void
    202 }
    203 
    204 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_1f
    205 ; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4
    206 ; GCN: store float %tmp, float addrspace(1)* %a, align 4
    207 define amdgpu_kernel void @test_pow_1f(float addrspace(1)* nocapture %a) {
    208 entry:
    209   %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
    210   %tmp = load float, float addrspace(1)* %arrayidx, align 4
    211   %call = tail call fast float @_Z3powff(float %tmp, float 1.000000e+00)
    212   store float %call, float addrspace(1)* %a, align 4
    213   ret void
    214 }
    215 
    216 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_1i
    217 ; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4
    218 ; GCN: store float %tmp, float addrspace(1)* %a, align 4
    219 define amdgpu_kernel void @test_pow_1i(float addrspace(1)* nocapture %a) {
    220 entry:
    221   %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
    222   %tmp = load float, float addrspace(1)* %arrayidx, align 4
    223   %call = tail call fast float @_Z3powff(float %tmp, float 1.000000e+00)
    224   store float %call, float addrspace(1)* %a, align 4
    225   ret void
    226 }
    227 
    228 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_2f
    229 ; GCN: %tmp = load float, float addrspace(1)* %a, align 4
    230 ; GCN: %__pow2 = fmul fast float %tmp, %tmp
    231 define amdgpu_kernel void @test_pow_2f(float addrspace(1)* nocapture %a) {
    232 entry:
    233   %tmp = load float, float addrspace(1)* %a, align 4
    234   %call = tail call fast float @_Z3powff(float %tmp, float 2.000000e+00)
    235   store float %call, float addrspace(1)* %a, align 4
    236   ret void
    237 }
    238 
    239 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_2i
    240 ; GCN: %tmp = load float, float addrspace(1)* %a, align 4
    241 ; GCN: %__pow2 = fmul fast float %tmp, %tmp
    242 define amdgpu_kernel void @test_pow_2i(float addrspace(1)* nocapture %a) {
    243 entry:
    244   %tmp = load float, float addrspace(1)* %a, align 4
    245   %call = tail call fast float @_Z3powff(float %tmp, float 2.000000e+00)
    246   store float %call, float addrspace(1)* %a, align 4
    247   ret void
    248 }
    249 
    250 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_m1f
    251 ; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4
    252 ; GCN: %__powrecip = fdiv fast float 1.000000e+00, %tmp
    253 define amdgpu_kernel void @test_pow_m1f(float addrspace(1)* nocapture %a) {
    254 entry:
    255   %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
    256   %tmp = load float, float addrspace(1)* %arrayidx, align 4
    257   %call = tail call fast float @_Z3powff(float %tmp, float -1.000000e+00)
    258   store float %call, float addrspace(1)* %a, align 4
    259   ret void
    260 }
    261 
    262 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_m1i
    263 ; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4
    264 ; GCN: %__powrecip = fdiv fast float 1.000000e+00, %tmp
    265 define amdgpu_kernel void @test_pow_m1i(float addrspace(1)* nocapture %a) {
    266 entry:
    267   %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
    268   %tmp = load float, float addrspace(1)* %arrayidx, align 4
    269   %call = tail call fast float @_Z3powff(float %tmp, float -1.000000e+00)
    270   store float %call, float addrspace(1)* %a, align 4
    271   ret void
    272 }
    273 
    274 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_half
    275 ; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float 5.000000e-01)
    276 ; GCN-PRELINK: %__pow2sqrt = tail call fast float @_Z4sqrtf(float %tmp)
    277 define amdgpu_kernel void @test_pow_half(float addrspace(1)* nocapture %a) {
    278 entry:
    279   %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
    280   %tmp = load float, float addrspace(1)* %arrayidx, align 4
    281   %call = tail call fast float @_Z3powff(float %tmp, float 5.000000e-01)
    282   store float %call, float addrspace(1)* %a, align 4
    283   ret void
    284 }
    285 
    286 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_mhalf
    287 ; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float -5.000000e-01)
    288 ; GCN-PRELINK: %__pow2rsqrt = tail call fast float @_Z5rsqrtf(float %tmp)
    289 define amdgpu_kernel void @test_pow_mhalf(float addrspace(1)* nocapture %a) {
    290 entry:
    291   %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
    292   %tmp = load float, float addrspace(1)* %arrayidx, align 4
    293   %call = tail call fast float @_Z3powff(float %tmp, float -5.000000e-01)
    294   store float %call, float addrspace(1)* %a, align 4
    295   ret void
    296 }
    297 
    298 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_c
    299 ; GCN: %__powx2 = fmul fast float %tmp, %tmp
    300 ; GCN: %__powx21 = fmul fast float %__powx2, %__powx2
    301 ; GCN: %__powx22 = fmul fast float %__powx2, %tmp
    302 ; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21
    303 ; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22
    304 define amdgpu_kernel void @test_pow_c(float addrspace(1)* nocapture %a) {
    305 entry:
    306   %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
    307   %tmp = load float, float addrspace(1)* %arrayidx, align 4
    308   %call = tail call fast float @_Z3powff(float %tmp, float 1.100000e+01)
    309   store float %call, float addrspace(1)* %a, align 4
    310   ret void
    311 }
    312 
    313 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr_c
    314 ; GCN: %__powx2 = fmul fast float %tmp, %tmp
    315 ; GCN: %__powx21 = fmul fast float %__powx2, %__powx2
    316 ; GCN: %__powx22 = fmul fast float %__powx2, %tmp
    317 ; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21
    318 ; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22
    319 define amdgpu_kernel void @test_powr_c(float addrspace(1)* nocapture %a) {
    320 entry:
    321   %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
    322   %tmp = load float, float addrspace(1)* %arrayidx, align 4
    323   %call = tail call fast float @_Z4powrff(float %tmp, float 1.100000e+01)
    324   store float %call, float addrspace(1)* %a, align 4
    325   ret void
    326 }
    327 
    328 declare float @_Z4powrff(float, float)
    329 
    330 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown_c
    331 ; GCN: %__powx2 = fmul fast float %tmp, %tmp
    332 ; GCN: %__powx21 = fmul fast float %__powx2, %__powx2
    333 ; GCN: %__powx22 = fmul fast float %__powx2, %tmp
    334 ; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21
    335 ; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22
    336 define amdgpu_kernel void @test_pown_c(float addrspace(1)* nocapture %a) {
    337 entry:
    338   %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
    339   %tmp = load float, float addrspace(1)* %arrayidx, align 4
    340   %call = tail call fast float @_Z4pownfi(float %tmp, i32 11)
    341   store float %call, float addrspace(1)* %a, align 4
    342   ret void
    343 }
    344 
    345 declare float @_Z4pownfi(float, i32)
    346 
    347 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow
    348 ; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float 1.013000e+03)
    349 ; GCN-PRELINK: %__fabs = tail call fast float @_Z4fabsf(float %tmp)
    350 ; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %__fabs)
    351 ; GCN-PRELINK: %__ylogx = fmul fast float %__log2, 1.013000e+03
    352 ; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx)
    353 ; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32
    354 ; GCN-PRELINK: %__pow_sign = and i32 %[[r0]], -2147483648
    355 ; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32
    356 ; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]
    357 ; GCN-PRELINK: %[[r3:.*]] = bitcast float addrspace(1)* %a to i32 addrspace(1)*
    358 ; GCN-PRELINK: store i32 %[[r2]], i32 addrspace(1)* %[[r3]], align 4
    359 define amdgpu_kernel void @test_pow(float addrspace(1)* nocapture %a) {
    360 entry:
    361   %tmp = load float, float addrspace(1)* %a, align 4
    362   %call = tail call fast float @_Z3powff(float %tmp, float 1.013000e+03)
    363   store float %call, float addrspace(1)* %a, align 4
    364   ret void
    365 }
    366 
    367 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr
    368 ; GCN-POSTLINK: tail call fast float @_Z4powrff(float %tmp, float %tmp1)
    369 ; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %tmp)
    370 ; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %tmp1
    371 ; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx)
    372 ; GCN-PRELINK: store float %__exp2, float addrspace(1)* %a, align 4
    373 ; GCN-NATIVE:  %__log2 = tail call fast float @_Z11native_log2f(float %tmp)
    374 ; GCN-NATIVE:  %__ylogx = fmul fast float %__log2, %tmp1
    375 ; GCN-NATIVE:  %__exp2 = tail call fast float @_Z11native_exp2f(float %__ylogx)
    376 ; GCN-NATIVE:  store float %__exp2, float addrspace(1)* %a, align 4
    377 define amdgpu_kernel void @test_powr(float addrspace(1)* nocapture %a) {
    378 entry:
    379   %tmp = load float, float addrspace(1)* %a, align 4
    380   %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
    381   %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4
    382   %call = tail call fast float @_Z4powrff(float %tmp, float %tmp1)
    383   store float %call, float addrspace(1)* %a, align 4
    384   ret void
    385 }
    386 
    387 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown
    388 ; GCN-POSTLINK: tail call fast float @_Z4pownfi(float %tmp, i32 %conv)
    389 ; GCN-PRELINK: %conv = fptosi float %tmp1 to i32
    390 ; GCN-PRELINK: %__fabs = tail call fast float @_Z4fabsf(float %tmp)
    391 ; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %__fabs)
    392 ; GCN-PRELINK: %pownI2F = sitofp i32 %conv to float
    393 ; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %pownI2F
    394 ; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx)
    395 ; GCN-PRELINK: %__yeven = shl i32 %conv, 31
    396 ; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32
    397 ; GCN-PRELINK: %__pow_sign = and i32 %__yeven, %[[r0]]
    398 ; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32
    399 ; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]
    400 ; GCN-PRELINK: %[[r3:.*]] = bitcast float addrspace(1)* %a to i32 addrspace(1)*
    401 ; GCN-PRELINK: store i32 %[[r2]], i32 addrspace(1)* %[[r3]], align 4
    402 define amdgpu_kernel void @test_pown(float addrspace(1)* nocapture %a) {
    403 entry:
    404   %tmp = load float, float addrspace(1)* %a, align 4
    405   %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
    406   %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4
    407   %conv = fptosi float %tmp1 to i32
    408   %call = tail call fast float @_Z4pownfi(float %tmp, i32 %conv)
    409   store float %call, float addrspace(1)* %a, align 4
    410   ret void
    411 }
    412 
    413 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_1
    414 ; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4
    415 ; GCN: store float %tmp, float addrspace(1)* %a, align 4
    416 define amdgpu_kernel void @test_rootn_1(float addrspace(1)* nocapture %a) {
    417 entry:
    418   %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
    419   %tmp = load float, float addrspace(1)* %arrayidx, align 4
    420   %call = tail call fast float @_Z5rootnfi(float %tmp, i32 1)
    421   store float %call, float addrspace(1)* %a, align 4
    422   ret void
    423 }
    424 
    425 declare float @_Z5rootnfi(float, i32)
    426 
    427 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_2
    428 ; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 2)
    429 ; GCN-PRELINK: %__rootn2sqrt = tail call fast float @_Z4sqrtf(float %tmp)
    430 define amdgpu_kernel void @test_rootn_2(float addrspace(1)* nocapture %a) {
    431 entry:
    432   %tmp = load float, float addrspace(1)* %a, align 4
    433   %call = tail call fast float @_Z5rootnfi(float %tmp, i32 2)
    434   store float %call, float addrspace(1)* %a, align 4
    435   ret void
    436 }
    437 
    438 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_3
    439 ; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 3)
    440 ; GCN-PRELINK: %__rootn2cbrt = tail call fast float @_Z4cbrtf(float %tmp)
    441 define amdgpu_kernel void @test_rootn_3(float addrspace(1)* nocapture %a) {
    442 entry:
    443   %tmp = load float, float addrspace(1)* %a, align 4
    444   %call = tail call fast float @_Z5rootnfi(float %tmp, i32 3)
    445   store float %call, float addrspace(1)* %a, align 4
    446   ret void
    447 }
    448 
    449 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_m1
    450 ; GCN: fdiv fast float 1.000000e+00, %tmp
    451 define amdgpu_kernel void @test_rootn_m1(float addrspace(1)* nocapture %a) {
    452 entry:
    453   %tmp = load float, float addrspace(1)* %a, align 4
    454   %call = tail call fast float @_Z5rootnfi(float %tmp, i32 -1)
    455   store float %call, float addrspace(1)* %a, align 4
    456   ret void
    457 }
    458 
    459 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_m2
    460 ; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 -2)
    461 ; GCN-PRELINK: %__rootn2rsqrt = tail call fast float @_Z5rsqrtf(float %tmp)
    462 define amdgpu_kernel void @test_rootn_m2(float addrspace(1)* nocapture %a) {
    463 entry:
    464   %tmp = load float, float addrspace(1)* %a, align 4
    465   %call = tail call fast float @_Z5rootnfi(float %tmp, i32 -2)
    466   store float %call, float addrspace(1)* %a, align 4
    467   ret void
    468 }
    469 
    470 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_0x
    471 ; GCN: store float %y, float addrspace(1)* %a
    472 define amdgpu_kernel void @test_fma_0x(float addrspace(1)* nocapture %a, float %y) {
    473 entry:
    474   %tmp = load float, float addrspace(1)* %a, align 4
    475   %call = tail call fast float @_Z3fmafff(float 0.000000e+00, float %tmp, float %y)
    476   store float %call, float addrspace(1)* %a, align 4
    477   ret void
    478 }
    479 
    480 declare float @_Z3fmafff(float, float, float)
    481 
    482 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_x0
    483 ; GCN: store float %y, float addrspace(1)* %a
    484 define amdgpu_kernel void @test_fma_x0(float addrspace(1)* nocapture %a, float %y) {
    485 entry:
    486   %tmp = load float, float addrspace(1)* %a, align 4
    487   %call = tail call fast float @_Z3fmafff(float %tmp, float 0.000000e+00, float %y)
    488   store float %call, float addrspace(1)* %a, align 4
    489   ret void
    490 }
    491 
    492 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_mad_0x
    493 ; GCN: store float %y, float addrspace(1)* %a
    494 define amdgpu_kernel void @test_mad_0x(float addrspace(1)* nocapture %a, float %y) {
    495 entry:
    496   %tmp = load float, float addrspace(1)* %a, align 4
    497   %call = tail call fast float @_Z3madfff(float 0.000000e+00, float %tmp, float %y)
    498   store float %call, float addrspace(1)* %a, align 4
    499   ret void
    500 }
    501 
    502 declare float @_Z3madfff(float, float, float)
    503 
    504 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_mad_x0
    505 ; GCN: store float %y, float addrspace(1)* %a
    506 define amdgpu_kernel void @test_mad_x0(float addrspace(1)* nocapture %a, float %y) {
    507 entry:
    508   %tmp = load float, float addrspace(1)* %a, align 4
    509   %call = tail call fast float @_Z3madfff(float %tmp, float 0.000000e+00, float %y)
    510   store float %call, float addrspace(1)* %a, align 4
    511   ret void
    512 }
    513 
    514 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_x1y
    515 ; GCN: %fmaadd = fadd fast float %tmp, %y
    516 define amdgpu_kernel void @test_fma_x1y(float addrspace(1)* nocapture %a, float %y) {
    517 entry:
    518   %tmp = load float, float addrspace(1)* %a, align 4
    519   %call = tail call fast float @_Z3fmafff(float %tmp, float 1.000000e+00, float %y)
    520   store float %call, float addrspace(1)* %a, align 4
    521   ret void
    522 }
    523 
    524 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_1xy
    525 ; GCN: %fmaadd = fadd fast float %tmp, %y
    526 define amdgpu_kernel void @test_fma_1xy(float addrspace(1)* nocapture %a, float %y) {
    527 entry:
    528   %tmp = load float, float addrspace(1)* %a, align 4
    529   %call = tail call fast float @_Z3fmafff(float 1.000000e+00, float %tmp, float %y)
    530   store float %call, float addrspace(1)* %a, align 4
    531   ret void
    532 }
    533 
    534 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_xy0
    535 ; GCN: %fmamul = fmul fast float %tmp1, %tmp
    536 define amdgpu_kernel void @test_fma_xy0(float addrspace(1)* nocapture %a) {
    537 entry:
    538   %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
    539   %tmp = load float, float addrspace(1)* %arrayidx, align 4
    540   %tmp1 = load float, float addrspace(1)* %a, align 4
    541   %call = tail call fast float @_Z3fmafff(float %tmp, float %tmp1, float 0.000000e+00)
    542   store float %call, float addrspace(1)* %a, align 4
    543   ret void
    544 }
    545 
    546 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp
    547 ; GCN-NATIVE: tail call fast float @_Z10native_expf(float %tmp)
    548 define amdgpu_kernel void @test_use_native_exp(float addrspace(1)* nocapture %a) {
    549 entry:
    550   %tmp = load float, float addrspace(1)* %a, align 4
    551   %call = tail call fast float @_Z3expf(float %tmp)
    552   store float %call, float addrspace(1)* %a, align 4
    553   ret void
    554 }
    555 
    556 declare float @_Z3expf(float)
    557 
    558 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp2
    559 ; GCN-NATIVE: tail call fast float @_Z11native_exp2f(float %tmp)
    560 define amdgpu_kernel void @test_use_native_exp2(float addrspace(1)* nocapture %a) {
    561 entry:
    562   %tmp = load float, float addrspace(1)* %a, align 4
    563   %call = tail call fast float @_Z4exp2f(float %tmp)
    564   store float %call, float addrspace(1)* %a, align 4
    565   ret void
    566 }
    567 
    568 declare float @_Z4exp2f(float)
    569 
    570 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp10
    571 ; GCN-NATIVE: tail call fast float @_Z12native_exp10f(float %tmp)
    572 define amdgpu_kernel void @test_use_native_exp10(float addrspace(1)* nocapture %a) {
    573 entry:
    574   %tmp = load float, float addrspace(1)* %a, align 4
    575   %call = tail call fast float @_Z5exp10f(float %tmp)
    576   store float %call, float addrspace(1)* %a, align 4
    577   ret void
    578 }
    579 
    580 declare float @_Z5exp10f(float)
    581 
    582 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log
    583 ; GCN-NATIVE: tail call fast float @_Z10native_logf(float %tmp)
    584 define amdgpu_kernel void @test_use_native_log(float addrspace(1)* nocapture %a) {
    585 entry:
    586   %tmp = load float, float addrspace(1)* %a, align 4
    587   %call = tail call fast float @_Z3logf(float %tmp)
    588   store float %call, float addrspace(1)* %a, align 4
    589   ret void
    590 }
    591 
    592 declare float @_Z3logf(float)
    593 
    594 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log2
    595 ; GCN-NATIVE: tail call fast float @_Z11native_log2f(float %tmp)
    596 define amdgpu_kernel void @test_use_native_log2(float addrspace(1)* nocapture %a) {
    597 entry:
    598   %tmp = load float, float addrspace(1)* %a, align 4
    599   %call = tail call fast float @_Z4log2f(float %tmp)
    600   store float %call, float addrspace(1)* %a, align 4
    601   ret void
    602 }
    603 
    604 declare float @_Z4log2f(float)
    605 
    606 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log10
    607 ; GCN-NATIVE: tail call fast float @_Z12native_log10f(float %tmp)
    608 define amdgpu_kernel void @test_use_native_log10(float addrspace(1)* nocapture %a) {
    609 entry:
    610   %tmp = load float, float addrspace(1)* %a, align 4
    611   %call = tail call fast float @_Z5log10f(float %tmp)
    612   store float %call, float addrspace(1)* %a, align 4
    613   ret void
    614 }
    615 
    616 declare float @_Z5log10f(float)
    617 
    618 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_powr
    619 ; GCN-NATIVE: %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4
    620 ; GCN-NATIVE: %__log2 = tail call fast float @_Z11native_log2f(float %tmp)
    621 ; GCN-NATIVE: %__ylogx = fmul fast float %__log2, %tmp1
    622 ; GCN-NATIVE: %__exp2 = tail call fast float @_Z11native_exp2f(float %__ylogx)
    623 ; GCN-NATIVE: store float %__exp2, float addrspace(1)* %a, align 4
    624 define amdgpu_kernel void @test_use_native_powr(float addrspace(1)* nocapture %a) {
    625 entry:
    626   %tmp = load float, float addrspace(1)* %a, align 4
    627   %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
    628   %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4
    629   %call = tail call fast float @_Z4powrff(float %tmp, float %tmp1)
    630   store float %call, float addrspace(1)* %a, align 4
    631   ret void
    632 }
    633 
    634 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sqrt
    635 ; GCN-NATIVE: tail call fast float @_Z11native_sqrtf(float %tmp)
    636 define amdgpu_kernel void @test_use_native_sqrt(float addrspace(1)* nocapture %a) {
    637 entry:
    638   %tmp = load float, float addrspace(1)* %a, align 4
    639   %call = tail call fast float @_Z4sqrtf(float %tmp)
    640   store float %call, float addrspace(1)* %a, align 4
    641   ret void
    642 }
    643 
    644 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64
    645 ; GCN: tail call fast double @_Z4sqrtd(double %tmp)
    646 define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64(double addrspace(1)* nocapture %a) {
    647 entry:
    648   %tmp = load double, double addrspace(1)* %a, align 8
    649   %call = tail call fast double @_Z4sqrtd(double %tmp)
    650   store double %call, double addrspace(1)* %a, align 8
    651   ret void
    652 }
    653 
    654 declare float @_Z4sqrtf(float)
    655 declare double @_Z4sqrtd(double)
    656 
    657 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_rsqrt
    658 ; GCN-NATIVE: tail call fast float @_Z12native_rsqrtf(float %tmp)
    659 define amdgpu_kernel void @test_use_native_rsqrt(float addrspace(1)* nocapture %a) {
    660 entry:
    661   %tmp = load float, float addrspace(1)* %a, align 4
    662   %call = tail call fast float @_Z5rsqrtf(float %tmp)
    663   store float %call, float addrspace(1)* %a, align 4
    664   ret void
    665 }
    666 
    667 declare float @_Z5rsqrtf(float)
    668 
    669 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_tan
    670 ; GCN-NATIVE: tail call fast float @_Z10native_tanf(float %tmp)
    671 define amdgpu_kernel void @test_use_native_tan(float addrspace(1)* nocapture %a) {
    672 entry:
    673   %tmp = load float, float addrspace(1)* %a, align 4
    674   %call = tail call fast float @_Z3tanf(float %tmp)
    675   store float %call, float addrspace(1)* %a, align 4
    676   ret void
    677 }
    678 
    679 declare float @_Z3tanf(float)
    680 
    681 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sincos
    682 ; GCN-NATIVE: tail call float @_Z10native_sinf(float %tmp)
    683 ; GCN-NATIVE: tail call float @_Z10native_cosf(float %tmp)
    684 define amdgpu_kernel void @test_use_native_sincos(float addrspace(1)* %a) {
    685 entry:
    686   %tmp = load float, float addrspace(1)* %a, align 4
    687   %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
    688   %tmp1 = addrspacecast float addrspace(1)* %arrayidx1 to float*
    689   %call = tail call fast float @_Z6sincosfPf(float %tmp, float* %tmp1)
    690   store float %call, float addrspace(1)* %a, align 4
    691   ret void
    692 }
    693 
    694 declare float @_Z6sincosfPf(float, float*)
    695 
    696 %opencl.pipe_t = type opaque
    697 %opencl.reserve_id_t = type opaque
    698 
    699 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr)
    700 ; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}}) #[[$NOUNWIND:[0-9]+]]
    701 ; GCN-PRELINK: call i32 @__read_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t addrspace(5)* %{{.*}}, i32 2, i32* %{{.*}}) #[[$NOUNWIND]]
    702 define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) local_unnamed_addr {
    703 entry:
    704   %tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)*
    705   %tmp1 = addrspacecast i8 addrspace(1)* %tmp to i8*
    706   %tmp2 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0
    707   %tmp3 = tail call %opencl.reserve_id_t addrspace(5)* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4)
    708   %tmp4 = tail call i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0
    709   tail call void @__commit_read_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4)
    710   ret void
    711 }
    712 
    713 declare i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)*, i8*, i32, i32)
    714 
    715 declare %opencl.reserve_id_t addrspace(5)* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)*, i32, i32, i32)
    716 
    717 declare i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i8*, i32, i32)
    718 
    719 declare void @__commit_read_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i32)
    720 
    721 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr)
    722 ; GCN-PRELINK: call i32 @__write_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}}) #[[$NOUNWIND]]
    723 ; GCN-PRELINK: call i32 @__write_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t addrspace(5)* %{{.*}}, i32 2, i32* %{{.*}}) #[[$NOUNWIND]]
    724 define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) local_unnamed_addr {
    725 entry:
    726   %tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)*
    727   %tmp1 = addrspacecast i8 addrspace(1)* %tmp to i8*
    728   %tmp2 = tail call i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0
    729   %tmp3 = tail call %opencl.reserve_id_t addrspace(5)* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) #0
    730   %tmp4 = tail call i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0
    731   tail call void @__commit_write_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4) #0
    732   ret void
    733 }
    734 
    735 declare i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)*, i8*, i32, i32) local_unnamed_addr
    736 
    737 declare %opencl.reserve_id_t addrspace(5)* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)*, i32, i32, i32) local_unnamed_addr
    738 
    739 declare i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i8*, i32, i32) local_unnamed_addr
    740 
    741 declare void @__commit_write_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i32) local_unnamed_addr
    742 
    743 %struct.S = type { [100 x i32] }
    744 
    745 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pipe_size
    746 ; GCN-PRELINK: call i32 @__read_pipe_2_1(%opencl.pipe_t addrspace(1)* %{{.*}} i8* %{{.*}}) #[[$NOUNWIND]]
    747 ; GCN-PRELINK: call i32 @__read_pipe_2_2(%opencl.pipe_t addrspace(1)* %{{.*}} i16* %{{.*}}) #[[$NOUNWIND]]
    748 ; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}} i32* %{{.*}}) #[[$NOUNWIND]]
    749 ; GCN-PRELINK: call i32 @__read_pipe_2_8(%opencl.pipe_t addrspace(1)* %{{.*}} i64* %{{.*}}) #[[$NOUNWIND]]
    750 ; GCN-PRELINK: call i32 @__read_pipe_2_16(%opencl.pipe_t addrspace(1)* %{{.*}}, <2 x i64>* %{{.*}}) #[[$NOUNWIND]]
    751 ; GCN-PRELINK: call i32 @__read_pipe_2_32(%opencl.pipe_t addrspace(1)* %{{.*}}, <4 x i64>* %{{.*}} #[[$NOUNWIND]]
    752 ; GCN-PRELINK: call i32 @__read_pipe_2_64(%opencl.pipe_t addrspace(1)* %{{.*}}, <8 x i64>* %{{.*}} #[[$NOUNWIND]]
    753 ; GCN-PRELINK: call i32 @__read_pipe_2_128(%opencl.pipe_t addrspace(1)* %{{.*}}, <16 x i64>* %{{.*}} #[[$NOUNWIND]]
    754 ; GCN-PRELINK: call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %{{.*}}, i8* %{{.*}} i32 400, i32 4) #[[$NOUNWIND]]
    755 define amdgpu_kernel void @test_pipe_size(%opencl.pipe_t addrspace(1)* %p1, i8 addrspace(1)* %ptr1, %opencl.pipe_t addrspace(1)* %p2, i16 addrspace(1)* %ptr2, %opencl.pipe_t addrspace(1)* %p4, i32 addrspace(1)* %ptr4, %opencl.pipe_t addrspace(1)* %p8, i64 addrspace(1)* %ptr8, %opencl.pipe_t addrspace(1)* %p16, <2 x i64> addrspace(1)* %ptr16, %opencl.pipe_t addrspace(1)* %p32, <4 x i64> addrspace(1)* %ptr32, %opencl.pipe_t addrspace(1)* %p64, <8 x i64> addrspace(1)* %ptr64, %opencl.pipe_t addrspace(1)* %p128, <16 x i64> addrspace(1)* %ptr128, %opencl.pipe_t addrspace(1)* %pu, %struct.S addrspace(1)* %ptru) local_unnamed_addr #0 {
    756 entry:
    757   %tmp = addrspacecast i8 addrspace(1)* %ptr1 to i8*
    758   %tmp1 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p1, i8* %tmp, i32 1, i32 1) #0
    759   %tmp2 = bitcast i16 addrspace(1)* %ptr2 to i8 addrspace(1)*
    760   %tmp3 = addrspacecast i8 addrspace(1)* %tmp2 to i8*
    761   %tmp4 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p2, i8* %tmp3, i32 2, i32 2) #0
    762   %tmp5 = bitcast i32 addrspace(1)* %ptr4 to i8 addrspace(1)*
    763   %tmp6 = addrspacecast i8 addrspace(1)* %tmp5 to i8*
    764   %tmp7 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p4, i8* %tmp6, i32 4, i32 4) #0
    765   %tmp8 = bitcast i64 addrspace(1)* %ptr8 to i8 addrspace(1)*
    766   %tmp9 = addrspacecast i8 addrspace(1)* %tmp8 to i8*
    767   %tmp10 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p8, i8* %tmp9, i32 8, i32 8) #0
    768   %tmp11 = bitcast <2 x i64> addrspace(1)* %ptr16 to i8 addrspace(1)*
    769   %tmp12 = addrspacecast i8 addrspace(1)* %tmp11 to i8*
    770   %tmp13 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p16, i8* %tmp12, i32 16, i32 16) #0
    771   %tmp14 = bitcast <4 x i64> addrspace(1)* %ptr32 to i8 addrspace(1)*
    772   %tmp15 = addrspacecast i8 addrspace(1)* %tmp14 to i8*
    773   %tmp16 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p32, i8* %tmp15, i32 32, i32 32) #0
    774   %tmp17 = bitcast <8 x i64> addrspace(1)* %ptr64 to i8 addrspace(1)*
    775   %tmp18 = addrspacecast i8 addrspace(1)* %tmp17 to i8*
    776   %tmp19 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p64, i8* %tmp18, i32 64, i32 64) #0
    777   %tmp20 = bitcast <16 x i64> addrspace(1)* %ptr128 to i8 addrspace(1)*
    778   %tmp21 = addrspacecast i8 addrspace(1)* %tmp20 to i8*
    779   %tmp22 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p128, i8* %tmp21, i32 128, i32 128) #0
    780   %tmp23 = bitcast %struct.S addrspace(1)* %ptru to i8 addrspace(1)*
    781   %tmp24 = addrspacecast i8 addrspace(1)* %tmp23 to i8*
    782   %tmp25 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %pu, i8* %tmp24, i32 400, i32 4) #0
    783   ret void
    784 }
    785 
    786 ; CGN-PRELINK: attributes #[[$NOUNWIND]] = { nounwind }
    787 attributes #0 = { nounwind }
    788