1 ; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-simplify-libcall < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-POSTLINK %s 2 ; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-simplify-libcall -amdgpu-prelink <%s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-PRELINK %s 3 ; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-use-native -amdgpu-prelink < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NATIVE %s 4 5 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos 6 ; GCN-POSTLINK: tail call fast float @_Z3sinf( 7 ; GCN-POSTLINK: tail call fast float @_Z3cosf( 8 ; GCN-PRELINK: call fast float @_Z6sincosfPf( 9 ; GCN-NATIVE: tail call fast float @_Z10native_sinf( 10 ; GCN-NATIVE: tail call fast float @_Z10native_cosf( 11 define amdgpu_kernel void @test_sincos(float addrspace(1)* nocapture %a) { 12 entry: 13 %tmp = load float, float addrspace(1)* %a, align 4 14 %call = tail call fast float @_Z3sinf(float %tmp) 15 store float %call, float addrspace(1)* %a, align 4 16 %call2 = tail call fast float @_Z3cosf(float %tmp) 17 %arrayidx3 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 18 store float %call2, float addrspace(1)* %arrayidx3, align 4 19 ret void 20 } 21 22 declare float @_Z3sinf(float) 23 24 declare float @_Z3cosf(float) 25 26 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v2 27 ; GCN-POSTLINK: tail call fast <2 x float> @_Z3sinDv2_f( 28 ; GCN-POSTLINK: tail call fast <2 x float> @_Z3cosDv2_f( 29 ; GCN-PRELINK: call fast <2 x float> @_Z6sincosDv2_fPS_( 30 ; GCN-NATIVE: tail call fast <2 x float> @_Z10native_sinDv2_f( 31 ; GCN-NATIVE: tail call fast <2 x float> @_Z10native_cosDv2_f( 32 define amdgpu_kernel void @test_sincos_v2(<2 x float> addrspace(1)* nocapture %a) { 33 entry: 34 %tmp = load <2 x float>, <2 x float> addrspace(1)* %a, align 8 35 %call = tail call fast <2 x float> @_Z3sinDv2_f(<2 x float> %tmp) 36 store <2 x float> %call, <2 x float> addrspace(1)* %a, align 8 37 %call2 = tail call fast <2 x float> @_Z3cosDv2_f(<2 x float> %tmp) 38 %arrayidx3 = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %a, i64 1 39 store <2 x float> %call2, <2 x float> addrspace(1)* %arrayidx3, align 8 40 ret void 41 } 42 43 declare <2 x float> @_Z3sinDv2_f(<2 x float>) 44 45 declare <2 x float> @_Z3cosDv2_f(<2 x float>) 46 47 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v3 48 ; GCN-POSTLINK: tail call fast <3 x float> @_Z3sinDv3_f( 49 ; GCN-POSTLINK: tail call fast <3 x float> @_Z3cosDv3_f( 50 ; GCN-PRELINK: call fast <3 x float> @_Z6sincosDv3_fPS_( 51 ; GCN-NATIVE: tail call fast <3 x float> @_Z10native_sinDv3_f( 52 ; GCN-NATIVE: tail call fast <3 x float> @_Z10native_cosDv3_f( 53 define amdgpu_kernel void @test_sincos_v3(<3 x float> addrspace(1)* nocapture %a) { 54 entry: 55 %castToVec4 = bitcast <3 x float> addrspace(1)* %a to <4 x float> addrspace(1)* 56 %loadVec4 = load <4 x float>, <4 x float> addrspace(1)* %castToVec4, align 16 57 %extractVec4 = shufflevector <4 x float> %loadVec4, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 58 %call = tail call fast <3 x float> @_Z3sinDv3_f(<3 x float> %extractVec4) 59 %extractVec6 = shufflevector <3 x float> %call, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef> 60 store <4 x float> %extractVec6, <4 x float> addrspace(1)* %castToVec4, align 16 61 %call11 = tail call fast <3 x float> @_Z3cosDv3_f(<3 x float> %extractVec4) 62 %arrayidx12 = getelementptr inbounds <3 x float>, <3 x float> addrspace(1)* %a, i64 1 63 %extractVec13 = shufflevector <3 x float> %call11, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef> 64 %storetmp14 = bitcast <3 x float> addrspace(1)* %arrayidx12 to <4 x float> addrspace(1)* 65 store <4 x float> %extractVec13, <4 x float> addrspace(1)* %storetmp14, align 16 66 ret void 67 } 68 69 declare <3 x float> @_Z3sinDv3_f(<3 x float>) 70 71 declare <3 x float> @_Z3cosDv3_f(<3 x float>) 72 73 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v4 74 ; GCN-POSTLINK: tail call fast <4 x float> @_Z3sinDv4_f( 75 ; GCN-POSTLINK: tail call fast <4 x float> @_Z3cosDv4_f( 76 ; GCN-PRELINK: call fast <4 x float> @_Z6sincosDv4_fPS_( 77 ; GCN-NATIVE: tail call fast <4 x float> @_Z10native_sinDv4_f( 78 ; GCN-NATIVE: tail call fast <4 x float> @_Z10native_cosDv4_f( 79 define amdgpu_kernel void @test_sincos_v4(<4 x float> addrspace(1)* nocapture %a) { 80 entry: 81 %tmp = load <4 x float>, <4 x float> addrspace(1)* %a, align 16 82 %call = tail call fast <4 x float> @_Z3sinDv4_f(<4 x float> %tmp) 83 store <4 x float> %call, <4 x float> addrspace(1)* %a, align 16 84 %call2 = tail call fast <4 x float> @_Z3cosDv4_f(<4 x float> %tmp) 85 %arrayidx3 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %a, i64 1 86 store <4 x float> %call2, <4 x float> addrspace(1)* %arrayidx3, align 16 87 ret void 88 } 89 90 declare <4 x float> @_Z3sinDv4_f(<4 x float>) 91 92 declare <4 x float> @_Z3cosDv4_f(<4 x float>) 93 94 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v8 95 ; GCN-POSTLINK: tail call fast <8 x float> @_Z3sinDv8_f( 96 ; GCN-POSTLINK: tail call fast <8 x float> @_Z3cosDv8_f( 97 ; GCN-PRELINK: call fast <8 x float> @_Z6sincosDv8_fPS_( 98 ; GCN-NATIVE: tail call fast <8 x float> @_Z10native_sinDv8_f( 99 ; GCN-NATIVE: tail call fast <8 x float> @_Z10native_cosDv8_f( 100 define amdgpu_kernel void @test_sincos_v8(<8 x float> addrspace(1)* nocapture %a) { 101 entry: 102 %tmp = load <8 x float>, <8 x float> addrspace(1)* %a, align 32 103 %call = tail call fast <8 x float> @_Z3sinDv8_f(<8 x float> %tmp) 104 store <8 x float> %call, <8 x float> addrspace(1)* %a, align 32 105 %call2 = tail call fast <8 x float> @_Z3cosDv8_f(<8 x float> %tmp) 106 %arrayidx3 = getelementptr inbounds <8 x float>, <8 x float> addrspace(1)* %a, i64 1 107 store <8 x float> %call2, <8 x float> addrspace(1)* %arrayidx3, align 32 108 ret void 109 } 110 111 declare <8 x float> @_Z3sinDv8_f(<8 x float>) 112 113 declare <8 x float> @_Z3cosDv8_f(<8 x float>) 114 115 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v16 116 ; GCN-POSTLINK: tail call fast <16 x float> @_Z3sinDv16_f( 117 ; GCN-POSTLINK: tail call fast <16 x float> @_Z3cosDv16_f( 118 ; GCN-PRELINK: call fast <16 x float> @_Z6sincosDv16_fPS_( 119 ; GCN-NATIVE: tail call fast <16 x float> @_Z10native_sinDv16_f( 120 ; GCN-NATIVE: tail call fast <16 x float> @_Z10native_cosDv16_f( 121 define amdgpu_kernel void @test_sincos_v16(<16 x float> addrspace(1)* nocapture %a) { 122 entry: 123 %tmp = load <16 x float>, <16 x float> addrspace(1)* %a, align 64 124 %call = tail call fast <16 x float> @_Z3sinDv16_f(<16 x float> %tmp) 125 store <16 x float> %call, <16 x float> addrspace(1)* %a, align 64 126 %call2 = tail call fast <16 x float> @_Z3cosDv16_f(<16 x float> %tmp) 127 %arrayidx3 = getelementptr inbounds <16 x float>, <16 x float> addrspace(1)* %a, i64 1 128 store <16 x float> %call2, <16 x float> addrspace(1)* %arrayidx3, align 64 129 ret void 130 } 131 132 declare <16 x float> @_Z3sinDv16_f(<16 x float>) 133 134 declare <16 x float> @_Z3cosDv16_f(<16 x float>) 135 136 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_native_recip 137 ; GCN: store float 0x3FD5555560000000, float addrspace(1)* %a 138 define amdgpu_kernel void @test_native_recip(float addrspace(1)* nocapture %a) { 139 entry: 140 %call = tail call fast float @_Z12native_recipf(float 3.000000e+00) 141 store float %call, float addrspace(1)* %a, align 4 142 ret void 143 } 144 145 declare float @_Z12native_recipf(float) 146 147 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_half_recip 148 ; GCN: store float 0x3FD5555560000000, float addrspace(1)* %a 149 define amdgpu_kernel void @test_half_recip(float addrspace(1)* nocapture %a) { 150 entry: 151 %call = tail call fast float @_Z10half_recipf(float 3.000000e+00) 152 store float %call, float addrspace(1)* %a, align 4 153 ret void 154 } 155 156 declare float @_Z10half_recipf(float) 157 158 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_native_divide 159 ; GCN: fmul fast float %tmp, 0x3FD5555560000000 160 define amdgpu_kernel void @test_native_divide(float addrspace(1)* nocapture %a) { 161 entry: 162 %tmp = load float, float addrspace(1)* %a, align 4 163 %call = tail call fast float @_Z13native_divideff(float %tmp, float 3.000000e+00) 164 store float %call, float addrspace(1)* %a, align 4 165 ret void 166 } 167 168 declare float @_Z13native_divideff(float, float) 169 170 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_half_divide 171 ; GCN: fmul fast float %tmp, 0x3FD5555560000000 172 define amdgpu_kernel void @test_half_divide(float addrspace(1)* nocapture %a) { 173 entry: 174 %tmp = load float, float addrspace(1)* %a, align 4 175 %call = tail call fast float @_Z11half_divideff(float %tmp, float 3.000000e+00) 176 store float %call, float addrspace(1)* %a, align 4 177 ret void 178 } 179 180 declare float @_Z11half_divideff(float, float) 181 182 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_0f 183 ; GCN: store float 1.000000e+00, float addrspace(1)* %a 184 define amdgpu_kernel void @test_pow_0f(float addrspace(1)* nocapture %a) { 185 entry: 186 %tmp = load float, float addrspace(1)* %a, align 4 187 %call = tail call fast float @_Z3powff(float %tmp, float 0.000000e+00) 188 store float %call, float addrspace(1)* %a, align 4 189 ret void 190 } 191 192 declare float @_Z3powff(float, float) 193 194 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_0i 195 ; GCN: store float 1.000000e+00, float addrspace(1)* %a 196 define amdgpu_kernel void @test_pow_0i(float addrspace(1)* nocapture %a) { 197 entry: 198 %tmp = load float, float addrspace(1)* %a, align 4 199 %call = tail call fast float @_Z3powff(float %tmp, float 0.000000e+00) 200 store float %call, float addrspace(1)* %a, align 4 201 ret void 202 } 203 204 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_1f 205 ; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4 206 ; GCN: store float %tmp, float addrspace(1)* %a, align 4 207 define amdgpu_kernel void @test_pow_1f(float addrspace(1)* nocapture %a) { 208 entry: 209 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 210 %tmp = load float, float addrspace(1)* %arrayidx, align 4 211 %call = tail call fast float @_Z3powff(float %tmp, float 1.000000e+00) 212 store float %call, float addrspace(1)* %a, align 4 213 ret void 214 } 215 216 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_1i 217 ; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4 218 ; GCN: store float %tmp, float addrspace(1)* %a, align 4 219 define amdgpu_kernel void @test_pow_1i(float addrspace(1)* nocapture %a) { 220 entry: 221 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 222 %tmp = load float, float addrspace(1)* %arrayidx, align 4 223 %call = tail call fast float @_Z3powff(float %tmp, float 1.000000e+00) 224 store float %call, float addrspace(1)* %a, align 4 225 ret void 226 } 227 228 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_2f 229 ; GCN: %tmp = load float, float addrspace(1)* %a, align 4 230 ; GCN: %__pow2 = fmul fast float %tmp, %tmp 231 define amdgpu_kernel void @test_pow_2f(float addrspace(1)* nocapture %a) { 232 entry: 233 %tmp = load float, float addrspace(1)* %a, align 4 234 %call = tail call fast float @_Z3powff(float %tmp, float 2.000000e+00) 235 store float %call, float addrspace(1)* %a, align 4 236 ret void 237 } 238 239 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_2i 240 ; GCN: %tmp = load float, float addrspace(1)* %a, align 4 241 ; GCN: %__pow2 = fmul fast float %tmp, %tmp 242 define amdgpu_kernel void @test_pow_2i(float addrspace(1)* nocapture %a) { 243 entry: 244 %tmp = load float, float addrspace(1)* %a, align 4 245 %call = tail call fast float @_Z3powff(float %tmp, float 2.000000e+00) 246 store float %call, float addrspace(1)* %a, align 4 247 ret void 248 } 249 250 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_m1f 251 ; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4 252 ; GCN: %__powrecip = fdiv fast float 1.000000e+00, %tmp 253 define amdgpu_kernel void @test_pow_m1f(float addrspace(1)* nocapture %a) { 254 entry: 255 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 256 %tmp = load float, float addrspace(1)* %arrayidx, align 4 257 %call = tail call fast float @_Z3powff(float %tmp, float -1.000000e+00) 258 store float %call, float addrspace(1)* %a, align 4 259 ret void 260 } 261 262 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_m1i 263 ; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4 264 ; GCN: %__powrecip = fdiv fast float 1.000000e+00, %tmp 265 define amdgpu_kernel void @test_pow_m1i(float addrspace(1)* nocapture %a) { 266 entry: 267 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 268 %tmp = load float, float addrspace(1)* %arrayidx, align 4 269 %call = tail call fast float @_Z3powff(float %tmp, float -1.000000e+00) 270 store float %call, float addrspace(1)* %a, align 4 271 ret void 272 } 273 274 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_half 275 ; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float 5.000000e-01) 276 ; GCN-PRELINK: %__pow2sqrt = tail call fast float @_Z4sqrtf(float %tmp) 277 define amdgpu_kernel void @test_pow_half(float addrspace(1)* nocapture %a) { 278 entry: 279 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 280 %tmp = load float, float addrspace(1)* %arrayidx, align 4 281 %call = tail call fast float @_Z3powff(float %tmp, float 5.000000e-01) 282 store float %call, float addrspace(1)* %a, align 4 283 ret void 284 } 285 286 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_mhalf 287 ; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float -5.000000e-01) 288 ; GCN-PRELINK: %__pow2rsqrt = tail call fast float @_Z5rsqrtf(float %tmp) 289 define amdgpu_kernel void @test_pow_mhalf(float addrspace(1)* nocapture %a) { 290 entry: 291 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 292 %tmp = load float, float addrspace(1)* %arrayidx, align 4 293 %call = tail call fast float @_Z3powff(float %tmp, float -5.000000e-01) 294 store float %call, float addrspace(1)* %a, align 4 295 ret void 296 } 297 298 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_c 299 ; GCN: %__powx2 = fmul fast float %tmp, %tmp 300 ; GCN: %__powx21 = fmul fast float %__powx2, %__powx2 301 ; GCN: %__powx22 = fmul fast float %__powx2, %tmp 302 ; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21 303 ; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22 304 define amdgpu_kernel void @test_pow_c(float addrspace(1)* nocapture %a) { 305 entry: 306 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 307 %tmp = load float, float addrspace(1)* %arrayidx, align 4 308 %call = tail call fast float @_Z3powff(float %tmp, float 1.100000e+01) 309 store float %call, float addrspace(1)* %a, align 4 310 ret void 311 } 312 313 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr_c 314 ; GCN: %__powx2 = fmul fast float %tmp, %tmp 315 ; GCN: %__powx21 = fmul fast float %__powx2, %__powx2 316 ; GCN: %__powx22 = fmul fast float %__powx2, %tmp 317 ; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21 318 ; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22 319 define amdgpu_kernel void @test_powr_c(float addrspace(1)* nocapture %a) { 320 entry: 321 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 322 %tmp = load float, float addrspace(1)* %arrayidx, align 4 323 %call = tail call fast float @_Z4powrff(float %tmp, float 1.100000e+01) 324 store float %call, float addrspace(1)* %a, align 4 325 ret void 326 } 327 328 declare float @_Z4powrff(float, float) 329 330 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown_c 331 ; GCN: %__powx2 = fmul fast float %tmp, %tmp 332 ; GCN: %__powx21 = fmul fast float %__powx2, %__powx2 333 ; GCN: %__powx22 = fmul fast float %__powx2, %tmp 334 ; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21 335 ; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22 336 define amdgpu_kernel void @test_pown_c(float addrspace(1)* nocapture %a) { 337 entry: 338 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 339 %tmp = load float, float addrspace(1)* %arrayidx, align 4 340 %call = tail call fast float @_Z4pownfi(float %tmp, i32 11) 341 store float %call, float addrspace(1)* %a, align 4 342 ret void 343 } 344 345 declare float @_Z4pownfi(float, i32) 346 347 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow 348 ; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float 1.013000e+03) 349 ; GCN-PRELINK: %__fabs = tail call fast float @_Z4fabsf(float %tmp) 350 ; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %__fabs) 351 ; GCN-PRELINK: %__ylogx = fmul fast float %__log2, 1.013000e+03 352 ; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx) 353 ; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32 354 ; GCN-PRELINK: %__pow_sign = and i32 %[[r0]], -2147483648 355 ; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32 356 ; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]] 357 ; GCN-PRELINK: %[[r3:.*]] = bitcast float addrspace(1)* %a to i32 addrspace(1)* 358 ; GCN-PRELINK: store i32 %[[r2]], i32 addrspace(1)* %[[r3]], align 4 359 define amdgpu_kernel void @test_pow(float addrspace(1)* nocapture %a) { 360 entry: 361 %tmp = load float, float addrspace(1)* %a, align 4 362 %call = tail call fast float @_Z3powff(float %tmp, float 1.013000e+03) 363 store float %call, float addrspace(1)* %a, align 4 364 ret void 365 } 366 367 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr 368 ; GCN-POSTLINK: tail call fast float @_Z4powrff(float %tmp, float %tmp1) 369 ; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %tmp) 370 ; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %tmp1 371 ; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx) 372 ; GCN-PRELINK: store float %__exp2, float addrspace(1)* %a, align 4 373 ; GCN-NATIVE: %__log2 = tail call fast float @_Z11native_log2f(float %tmp) 374 ; GCN-NATIVE: %__ylogx = fmul fast float %__log2, %tmp1 375 ; GCN-NATIVE: %__exp2 = tail call fast float @_Z11native_exp2f(float %__ylogx) 376 ; GCN-NATIVE: store float %__exp2, float addrspace(1)* %a, align 4 377 define amdgpu_kernel void @test_powr(float addrspace(1)* nocapture %a) { 378 entry: 379 %tmp = load float, float addrspace(1)* %a, align 4 380 %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 381 %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4 382 %call = tail call fast float @_Z4powrff(float %tmp, float %tmp1) 383 store float %call, float addrspace(1)* %a, align 4 384 ret void 385 } 386 387 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown 388 ; GCN-POSTLINK: tail call fast float @_Z4pownfi(float %tmp, i32 %conv) 389 ; GCN-PRELINK: %conv = fptosi float %tmp1 to i32 390 ; GCN-PRELINK: %__fabs = tail call fast float @_Z4fabsf(float %tmp) 391 ; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %__fabs) 392 ; GCN-PRELINK: %pownI2F = sitofp i32 %conv to float 393 ; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %pownI2F 394 ; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx) 395 ; GCN-PRELINK: %__yeven = shl i32 %conv, 31 396 ; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32 397 ; GCN-PRELINK: %__pow_sign = and i32 %__yeven, %[[r0]] 398 ; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32 399 ; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]] 400 ; GCN-PRELINK: %[[r3:.*]] = bitcast float addrspace(1)* %a to i32 addrspace(1)* 401 ; GCN-PRELINK: store i32 %[[r2]], i32 addrspace(1)* %[[r3]], align 4 402 define amdgpu_kernel void @test_pown(float addrspace(1)* nocapture %a) { 403 entry: 404 %tmp = load float, float addrspace(1)* %a, align 4 405 %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 406 %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4 407 %conv = fptosi float %tmp1 to i32 408 %call = tail call fast float @_Z4pownfi(float %tmp, i32 %conv) 409 store float %call, float addrspace(1)* %a, align 4 410 ret void 411 } 412 413 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_1 414 ; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4 415 ; GCN: store float %tmp, float addrspace(1)* %a, align 4 416 define amdgpu_kernel void @test_rootn_1(float addrspace(1)* nocapture %a) { 417 entry: 418 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 419 %tmp = load float, float addrspace(1)* %arrayidx, align 4 420 %call = tail call fast float @_Z5rootnfi(float %tmp, i32 1) 421 store float %call, float addrspace(1)* %a, align 4 422 ret void 423 } 424 425 declare float @_Z5rootnfi(float, i32) 426 427 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_2 428 ; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 2) 429 ; GCN-PRELINK: %__rootn2sqrt = tail call fast float @_Z4sqrtf(float %tmp) 430 define amdgpu_kernel void @test_rootn_2(float addrspace(1)* nocapture %a) { 431 entry: 432 %tmp = load float, float addrspace(1)* %a, align 4 433 %call = tail call fast float @_Z5rootnfi(float %tmp, i32 2) 434 store float %call, float addrspace(1)* %a, align 4 435 ret void 436 } 437 438 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_3 439 ; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 3) 440 ; GCN-PRELINK: %__rootn2cbrt = tail call fast float @_Z4cbrtf(float %tmp) 441 define amdgpu_kernel void @test_rootn_3(float addrspace(1)* nocapture %a) { 442 entry: 443 %tmp = load float, float addrspace(1)* %a, align 4 444 %call = tail call fast float @_Z5rootnfi(float %tmp, i32 3) 445 store float %call, float addrspace(1)* %a, align 4 446 ret void 447 } 448 449 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_m1 450 ; GCN: fdiv fast float 1.000000e+00, %tmp 451 define amdgpu_kernel void @test_rootn_m1(float addrspace(1)* nocapture %a) { 452 entry: 453 %tmp = load float, float addrspace(1)* %a, align 4 454 %call = tail call fast float @_Z5rootnfi(float %tmp, i32 -1) 455 store float %call, float addrspace(1)* %a, align 4 456 ret void 457 } 458 459 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_m2 460 ; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 -2) 461 ; GCN-PRELINK: %__rootn2rsqrt = tail call fast float @_Z5rsqrtf(float %tmp) 462 define amdgpu_kernel void @test_rootn_m2(float addrspace(1)* nocapture %a) { 463 entry: 464 %tmp = load float, float addrspace(1)* %a, align 4 465 %call = tail call fast float @_Z5rootnfi(float %tmp, i32 -2) 466 store float %call, float addrspace(1)* %a, align 4 467 ret void 468 } 469 470 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_0x 471 ; GCN: store float %y, float addrspace(1)* %a 472 define amdgpu_kernel void @test_fma_0x(float addrspace(1)* nocapture %a, float %y) { 473 entry: 474 %tmp = load float, float addrspace(1)* %a, align 4 475 %call = tail call fast float @_Z3fmafff(float 0.000000e+00, float %tmp, float %y) 476 store float %call, float addrspace(1)* %a, align 4 477 ret void 478 } 479 480 declare float @_Z3fmafff(float, float, float) 481 482 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_x0 483 ; GCN: store float %y, float addrspace(1)* %a 484 define amdgpu_kernel void @test_fma_x0(float addrspace(1)* nocapture %a, float %y) { 485 entry: 486 %tmp = load float, float addrspace(1)* %a, align 4 487 %call = tail call fast float @_Z3fmafff(float %tmp, float 0.000000e+00, float %y) 488 store float %call, float addrspace(1)* %a, align 4 489 ret void 490 } 491 492 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_mad_0x 493 ; GCN: store float %y, float addrspace(1)* %a 494 define amdgpu_kernel void @test_mad_0x(float addrspace(1)* nocapture %a, float %y) { 495 entry: 496 %tmp = load float, float addrspace(1)* %a, align 4 497 %call = tail call fast float @_Z3madfff(float 0.000000e+00, float %tmp, float %y) 498 store float %call, float addrspace(1)* %a, align 4 499 ret void 500 } 501 502 declare float @_Z3madfff(float, float, float) 503 504 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_mad_x0 505 ; GCN: store float %y, float addrspace(1)* %a 506 define amdgpu_kernel void @test_mad_x0(float addrspace(1)* nocapture %a, float %y) { 507 entry: 508 %tmp = load float, float addrspace(1)* %a, align 4 509 %call = tail call fast float @_Z3madfff(float %tmp, float 0.000000e+00, float %y) 510 store float %call, float addrspace(1)* %a, align 4 511 ret void 512 } 513 514 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_x1y 515 ; GCN: %fmaadd = fadd fast float %tmp, %y 516 define amdgpu_kernel void @test_fma_x1y(float addrspace(1)* nocapture %a, float %y) { 517 entry: 518 %tmp = load float, float addrspace(1)* %a, align 4 519 %call = tail call fast float @_Z3fmafff(float %tmp, float 1.000000e+00, float %y) 520 store float %call, float addrspace(1)* %a, align 4 521 ret void 522 } 523 524 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_1xy 525 ; GCN: %fmaadd = fadd fast float %tmp, %y 526 define amdgpu_kernel void @test_fma_1xy(float addrspace(1)* nocapture %a, float %y) { 527 entry: 528 %tmp = load float, float addrspace(1)* %a, align 4 529 %call = tail call fast float @_Z3fmafff(float 1.000000e+00, float %tmp, float %y) 530 store float %call, float addrspace(1)* %a, align 4 531 ret void 532 } 533 534 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_xy0 535 ; GCN: %fmamul = fmul fast float %tmp1, %tmp 536 define amdgpu_kernel void @test_fma_xy0(float addrspace(1)* nocapture %a) { 537 entry: 538 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 539 %tmp = load float, float addrspace(1)* %arrayidx, align 4 540 %tmp1 = load float, float addrspace(1)* %a, align 4 541 %call = tail call fast float @_Z3fmafff(float %tmp, float %tmp1, float 0.000000e+00) 542 store float %call, float addrspace(1)* %a, align 4 543 ret void 544 } 545 546 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp 547 ; GCN-NATIVE: tail call fast float @_Z10native_expf(float %tmp) 548 define amdgpu_kernel void @test_use_native_exp(float addrspace(1)* nocapture %a) { 549 entry: 550 %tmp = load float, float addrspace(1)* %a, align 4 551 %call = tail call fast float @_Z3expf(float %tmp) 552 store float %call, float addrspace(1)* %a, align 4 553 ret void 554 } 555 556 declare float @_Z3expf(float) 557 558 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp2 559 ; GCN-NATIVE: tail call fast float @_Z11native_exp2f(float %tmp) 560 define amdgpu_kernel void @test_use_native_exp2(float addrspace(1)* nocapture %a) { 561 entry: 562 %tmp = load float, float addrspace(1)* %a, align 4 563 %call = tail call fast float @_Z4exp2f(float %tmp) 564 store float %call, float addrspace(1)* %a, align 4 565 ret void 566 } 567 568 declare float @_Z4exp2f(float) 569 570 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp10 571 ; GCN-NATIVE: tail call fast float @_Z12native_exp10f(float %tmp) 572 define amdgpu_kernel void @test_use_native_exp10(float addrspace(1)* nocapture %a) { 573 entry: 574 %tmp = load float, float addrspace(1)* %a, align 4 575 %call = tail call fast float @_Z5exp10f(float %tmp) 576 store float %call, float addrspace(1)* %a, align 4 577 ret void 578 } 579 580 declare float @_Z5exp10f(float) 581 582 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log 583 ; GCN-NATIVE: tail call fast float @_Z10native_logf(float %tmp) 584 define amdgpu_kernel void @test_use_native_log(float addrspace(1)* nocapture %a) { 585 entry: 586 %tmp = load float, float addrspace(1)* %a, align 4 587 %call = tail call fast float @_Z3logf(float %tmp) 588 store float %call, float addrspace(1)* %a, align 4 589 ret void 590 } 591 592 declare float @_Z3logf(float) 593 594 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log2 595 ; GCN-NATIVE: tail call fast float @_Z11native_log2f(float %tmp) 596 define amdgpu_kernel void @test_use_native_log2(float addrspace(1)* nocapture %a) { 597 entry: 598 %tmp = load float, float addrspace(1)* %a, align 4 599 %call = tail call fast float @_Z4log2f(float %tmp) 600 store float %call, float addrspace(1)* %a, align 4 601 ret void 602 } 603 604 declare float @_Z4log2f(float) 605 606 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log10 607 ; GCN-NATIVE: tail call fast float @_Z12native_log10f(float %tmp) 608 define amdgpu_kernel void @test_use_native_log10(float addrspace(1)* nocapture %a) { 609 entry: 610 %tmp = load float, float addrspace(1)* %a, align 4 611 %call = tail call fast float @_Z5log10f(float %tmp) 612 store float %call, float addrspace(1)* %a, align 4 613 ret void 614 } 615 616 declare float @_Z5log10f(float) 617 618 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_powr 619 ; GCN-NATIVE: %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4 620 ; GCN-NATIVE: %__log2 = tail call fast float @_Z11native_log2f(float %tmp) 621 ; GCN-NATIVE: %__ylogx = fmul fast float %__log2, %tmp1 622 ; GCN-NATIVE: %__exp2 = tail call fast float @_Z11native_exp2f(float %__ylogx) 623 ; GCN-NATIVE: store float %__exp2, float addrspace(1)* %a, align 4 624 define amdgpu_kernel void @test_use_native_powr(float addrspace(1)* nocapture %a) { 625 entry: 626 %tmp = load float, float addrspace(1)* %a, align 4 627 %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 628 %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4 629 %call = tail call fast float @_Z4powrff(float %tmp, float %tmp1) 630 store float %call, float addrspace(1)* %a, align 4 631 ret void 632 } 633 634 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sqrt 635 ; GCN-NATIVE: tail call fast float @_Z11native_sqrtf(float %tmp) 636 define amdgpu_kernel void @test_use_native_sqrt(float addrspace(1)* nocapture %a) { 637 entry: 638 %tmp = load float, float addrspace(1)* %a, align 4 639 %call = tail call fast float @_Z4sqrtf(float %tmp) 640 store float %call, float addrspace(1)* %a, align 4 641 ret void 642 } 643 644 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64 645 ; GCN: tail call fast double @_Z4sqrtd(double %tmp) 646 define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64(double addrspace(1)* nocapture %a) { 647 entry: 648 %tmp = load double, double addrspace(1)* %a, align 8 649 %call = tail call fast double @_Z4sqrtd(double %tmp) 650 store double %call, double addrspace(1)* %a, align 8 651 ret void 652 } 653 654 declare float @_Z4sqrtf(float) 655 declare double @_Z4sqrtd(double) 656 657 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_rsqrt 658 ; GCN-NATIVE: tail call fast float @_Z12native_rsqrtf(float %tmp) 659 define amdgpu_kernel void @test_use_native_rsqrt(float addrspace(1)* nocapture %a) { 660 entry: 661 %tmp = load float, float addrspace(1)* %a, align 4 662 %call = tail call fast float @_Z5rsqrtf(float %tmp) 663 store float %call, float addrspace(1)* %a, align 4 664 ret void 665 } 666 667 declare float @_Z5rsqrtf(float) 668 669 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_tan 670 ; GCN-NATIVE: tail call fast float @_Z10native_tanf(float %tmp) 671 define amdgpu_kernel void @test_use_native_tan(float addrspace(1)* nocapture %a) { 672 entry: 673 %tmp = load float, float addrspace(1)* %a, align 4 674 %call = tail call fast float @_Z3tanf(float %tmp) 675 store float %call, float addrspace(1)* %a, align 4 676 ret void 677 } 678 679 declare float @_Z3tanf(float) 680 681 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sincos 682 ; GCN-NATIVE: tail call float @_Z10native_sinf(float %tmp) 683 ; GCN-NATIVE: tail call float @_Z10native_cosf(float %tmp) 684 define amdgpu_kernel void @test_use_native_sincos(float addrspace(1)* %a) { 685 entry: 686 %tmp = load float, float addrspace(1)* %a, align 4 687 %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 688 %tmp1 = addrspacecast float addrspace(1)* %arrayidx1 to float* 689 %call = tail call fast float @_Z6sincosfPf(float %tmp, float* %tmp1) 690 store float %call, float addrspace(1)* %a, align 4 691 ret void 692 } 693 694 declare float @_Z6sincosfPf(float, float*) 695 696 %opencl.pipe_t = type opaque 697 %opencl.reserve_id_t = type opaque 698 699 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) 700 ; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}}) #[[$NOUNWIND:[0-9]+]] 701 ; GCN-PRELINK: call i32 @__read_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t addrspace(5)* %{{.*}}, i32 2, i32* %{{.*}}) #[[$NOUNWIND]] 702 define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) local_unnamed_addr { 703 entry: 704 %tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)* 705 %tmp1 = addrspacecast i8 addrspace(1)* %tmp to i8* 706 %tmp2 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0 707 %tmp3 = tail call %opencl.reserve_id_t addrspace(5)* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) 708 %tmp4 = tail call i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0 709 tail call void @__commit_read_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4) 710 ret void 711 } 712 713 declare i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)*, i8*, i32, i32) 714 715 declare %opencl.reserve_id_t addrspace(5)* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)*, i32, i32, i32) 716 717 declare i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i8*, i32, i32) 718 719 declare void @__commit_read_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i32) 720 721 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) 722 ; GCN-PRELINK: call i32 @__write_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}}) #[[$NOUNWIND]] 723 ; GCN-PRELINK: call i32 @__write_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t addrspace(5)* %{{.*}}, i32 2, i32* %{{.*}}) #[[$NOUNWIND]] 724 define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) local_unnamed_addr { 725 entry: 726 %tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)* 727 %tmp1 = addrspacecast i8 addrspace(1)* %tmp to i8* 728 %tmp2 = tail call i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0 729 %tmp3 = tail call %opencl.reserve_id_t addrspace(5)* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) #0 730 %tmp4 = tail call i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0 731 tail call void @__commit_write_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4) #0 732 ret void 733 } 734 735 declare i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)*, i8*, i32, i32) local_unnamed_addr 736 737 declare %opencl.reserve_id_t addrspace(5)* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)*, i32, i32, i32) local_unnamed_addr 738 739 declare i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i8*, i32, i32) local_unnamed_addr 740 741 declare void @__commit_write_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i32) local_unnamed_addr 742 743 %struct.S = type { [100 x i32] } 744 745 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pipe_size 746 ; GCN-PRELINK: call i32 @__read_pipe_2_1(%opencl.pipe_t addrspace(1)* %{{.*}} i8* %{{.*}}) #[[$NOUNWIND]] 747 ; GCN-PRELINK: call i32 @__read_pipe_2_2(%opencl.pipe_t addrspace(1)* %{{.*}} i16* %{{.*}}) #[[$NOUNWIND]] 748 ; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}} i32* %{{.*}}) #[[$NOUNWIND]] 749 ; GCN-PRELINK: call i32 @__read_pipe_2_8(%opencl.pipe_t addrspace(1)* %{{.*}} i64* %{{.*}}) #[[$NOUNWIND]] 750 ; GCN-PRELINK: call i32 @__read_pipe_2_16(%opencl.pipe_t addrspace(1)* %{{.*}}, <2 x i64>* %{{.*}}) #[[$NOUNWIND]] 751 ; GCN-PRELINK: call i32 @__read_pipe_2_32(%opencl.pipe_t addrspace(1)* %{{.*}}, <4 x i64>* %{{.*}} #[[$NOUNWIND]] 752 ; GCN-PRELINK: call i32 @__read_pipe_2_64(%opencl.pipe_t addrspace(1)* %{{.*}}, <8 x i64>* %{{.*}} #[[$NOUNWIND]] 753 ; GCN-PRELINK: call i32 @__read_pipe_2_128(%opencl.pipe_t addrspace(1)* %{{.*}}, <16 x i64>* %{{.*}} #[[$NOUNWIND]] 754 ; GCN-PRELINK: call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %{{.*}}, i8* %{{.*}} i32 400, i32 4) #[[$NOUNWIND]] 755 define amdgpu_kernel void @test_pipe_size(%opencl.pipe_t addrspace(1)* %p1, i8 addrspace(1)* %ptr1, %opencl.pipe_t addrspace(1)* %p2, i16 addrspace(1)* %ptr2, %opencl.pipe_t addrspace(1)* %p4, i32 addrspace(1)* %ptr4, %opencl.pipe_t addrspace(1)* %p8, i64 addrspace(1)* %ptr8, %opencl.pipe_t addrspace(1)* %p16, <2 x i64> addrspace(1)* %ptr16, %opencl.pipe_t addrspace(1)* %p32, <4 x i64> addrspace(1)* %ptr32, %opencl.pipe_t addrspace(1)* %p64, <8 x i64> addrspace(1)* %ptr64, %opencl.pipe_t addrspace(1)* %p128, <16 x i64> addrspace(1)* %ptr128, %opencl.pipe_t addrspace(1)* %pu, %struct.S addrspace(1)* %ptru) local_unnamed_addr #0 { 756 entry: 757 %tmp = addrspacecast i8 addrspace(1)* %ptr1 to i8* 758 %tmp1 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p1, i8* %tmp, i32 1, i32 1) #0 759 %tmp2 = bitcast i16 addrspace(1)* %ptr2 to i8 addrspace(1)* 760 %tmp3 = addrspacecast i8 addrspace(1)* %tmp2 to i8* 761 %tmp4 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p2, i8* %tmp3, i32 2, i32 2) #0 762 %tmp5 = bitcast i32 addrspace(1)* %ptr4 to i8 addrspace(1)* 763 %tmp6 = addrspacecast i8 addrspace(1)* %tmp5 to i8* 764 %tmp7 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p4, i8* %tmp6, i32 4, i32 4) #0 765 %tmp8 = bitcast i64 addrspace(1)* %ptr8 to i8 addrspace(1)* 766 %tmp9 = addrspacecast i8 addrspace(1)* %tmp8 to i8* 767 %tmp10 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p8, i8* %tmp9, i32 8, i32 8) #0 768 %tmp11 = bitcast <2 x i64> addrspace(1)* %ptr16 to i8 addrspace(1)* 769 %tmp12 = addrspacecast i8 addrspace(1)* %tmp11 to i8* 770 %tmp13 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p16, i8* %tmp12, i32 16, i32 16) #0 771 %tmp14 = bitcast <4 x i64> addrspace(1)* %ptr32 to i8 addrspace(1)* 772 %tmp15 = addrspacecast i8 addrspace(1)* %tmp14 to i8* 773 %tmp16 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p32, i8* %tmp15, i32 32, i32 32) #0 774 %tmp17 = bitcast <8 x i64> addrspace(1)* %ptr64 to i8 addrspace(1)* 775 %tmp18 = addrspacecast i8 addrspace(1)* %tmp17 to i8* 776 %tmp19 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p64, i8* %tmp18, i32 64, i32 64) #0 777 %tmp20 = bitcast <16 x i64> addrspace(1)* %ptr128 to i8 addrspace(1)* 778 %tmp21 = addrspacecast i8 addrspace(1)* %tmp20 to i8* 779 %tmp22 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p128, i8* %tmp21, i32 128, i32 128) #0 780 %tmp23 = bitcast %struct.S addrspace(1)* %ptru to i8 addrspace(1)* 781 %tmp24 = addrspacecast i8 addrspace(1)* %tmp23 to i8* 782 %tmp25 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %pu, i8* %tmp24, i32 400, i32 4) #0 783 ret void 784 } 785 786 ; CGN-PRELINK: attributes #[[$NOUNWIND]] = { nounwind } 787 attributes #0 = { nounwind } 788