1 ; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=-999 -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s 2 3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 4 target triple = "x86_64-apple-macosx10.8.0" 5 6 declare double @llvm.fabs.f64(double) nounwind readnone 7 8 ;CHECK-LABEL: @vec_fabs_f64( 9 ;CHECK: load <2 x double> 10 ;CHECK: load <2 x double> 11 ;CHECK: call <2 x double> @llvm.fabs.v2f64 12 ;CHECK: store <2 x double> 13 ;CHECK: ret 14 define void @vec_fabs_f64(double* %a, double* %b, double* %c) { 15 entry: 16 %i0 = load double, double* %a, align 8 17 %i1 = load double, double* %b, align 8 18 %mul = fmul double %i0, %i1 19 %call = tail call double @llvm.fabs.f64(double %mul) nounwind readnone 20 %arrayidx3 = getelementptr inbounds double, double* %a, i64 1 21 %i3 = load double, double* %arrayidx3, align 8 22 %arrayidx4 = getelementptr inbounds double, double* %b, i64 1 23 %i4 = load double, double* %arrayidx4, align 8 24 %mul5 = fmul double %i3, %i4 25 %call5 = tail call double @llvm.fabs.f64(double %mul5) nounwind readnone 26 store double %call, double* %c, align 8 27 %arrayidx5 = getelementptr inbounds double, double* %c, i64 1 28 store double %call5, double* %arrayidx5, align 8 29 ret void 30 } 31 32 declare float @llvm.copysign.f32(float, float) nounwind readnone 33 34 ;CHECK-LABEL: @vec_copysign_f32( 35 ;CHECK: load <4 x float> 36 ;CHECK: load <4 x float> 37 ;CHECK: call <4 x float> @llvm.copysign.v4f32 38 ;CHECK: store <4 x float> 39 ;CHECK: ret 40 define void @vec_copysign_f32(float* %a, float* %b, float* noalias %c) { 41 entry: 42 %0 = load float, float* %a, align 4 43 %1 = load float, float* %b, align 4 44 %call0 = tail call float @llvm.copysign.f32(float %0, float %1) nounwind readnone 45 store float %call0, float* %c, align 4 46 47 %ix2 = getelementptr inbounds float, float* %a, i64 1 48 %2 = load float, float* %ix2, align 4 49 %ix3 = getelementptr inbounds float, float* %b, i64 1 50 %3 = load float, float* %ix3, align 4 51 %call1 = tail call float @llvm.copysign.f32(float %2, float %3) nounwind readnone 52 %c1 = getelementptr inbounds float, float* %c, i64 1 53 store float %call1, float* %c1, align 4 54 55 %ix4 = getelementptr inbounds float, float* %a, i64 2 56 %4 = load float, float* %ix4, align 4 57 %ix5 = getelementptr inbounds float, float* %b, i64 2 58 %5 = load float, float* %ix5, align 4 59 %call2 = tail call float @llvm.copysign.f32(float %4, float %5) nounwind readnone 60 %c2 = getelementptr inbounds float, float* %c, i64 2 61 store float %call2, float* %c2, align 4 62 63 %ix6 = getelementptr inbounds float, float* %a, i64 3 64 %6 = load float, float* %ix6, align 4 65 %ix7 = getelementptr inbounds float, float* %b, i64 3 66 %7 = load float, float* %ix7, align 4 67 %call3 = tail call float @llvm.copysign.f32(float %6, float %7) nounwind readnone 68 %c3 = getelementptr inbounds float, float* %c, i64 3 69 store float %call3, float* %c3, align 4 70 71 ret void 72 } 73 74 declare i32 @llvm.bswap.i32(i32) nounwind readnone 75 76 define void @vec_bswap_i32(i32* %a, i32* %b, i32* %c) { 77 entry: 78 %i0 = load i32, i32* %a, align 4 79 %i1 = load i32, i32* %b, align 4 80 %add1 = add i32 %i0, %i1 81 %call1 = tail call i32 @llvm.bswap.i32(i32 %add1) nounwind readnone 82 83 %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1 84 %i2 = load i32, i32* %arrayidx2, align 4 85 %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1 86 %i3 = load i32, i32* %arrayidx3, align 4 87 %add2 = add i32 %i2, %i3 88 %call2 = tail call i32 @llvm.bswap.i32(i32 %add2) nounwind readnone 89 90 %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2 91 %i4 = load i32, i32* %arrayidx4, align 4 92 %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2 93 %i5 = load i32, i32* %arrayidx5, align 4 94 %add3 = add i32 %i4, %i5 95 %call3 = tail call i32 @llvm.bswap.i32(i32 %add3) nounwind readnone 96 97 %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3 98 %i6 = load i32, i32* %arrayidx6, align 4 99 %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3 100 %i7 = load i32, i32* %arrayidx7, align 4 101 %add4 = add i32 %i6, %i7 102 %call4 = tail call i32 @llvm.bswap.i32(i32 %add4) nounwind readnone 103 104 store i32 %call1, i32* %c, align 4 105 %arrayidx8 = getelementptr inbounds i32, i32* %c, i32 1 106 store i32 %call2, i32* %arrayidx8, align 4 107 %arrayidx9 = getelementptr inbounds i32, i32* %c, i32 2 108 store i32 %call3, i32* %arrayidx9, align 4 109 %arrayidx10 = getelementptr inbounds i32, i32* %c, i32 3 110 store i32 %call4, i32* %arrayidx10, align 4 111 ret void 112 113 ; CHECK-LABEL: @vec_bswap_i32( 114 ; CHECK: load <4 x i32> 115 ; CHECK: load <4 x i32> 116 ; CHECK: call <4 x i32> @llvm.bswap.v4i32 117 ; CHECK: store <4 x i32> 118 ; CHECK: ret 119 } 120 121 declare i32 @llvm.ctlz.i32(i32,i1) nounwind readnone 122 123 define void @vec_ctlz_i32(i32* %a, i32* %b, i32* %c, i1) { 124 entry: 125 %i0 = load i32, i32* %a, align 4 126 %i1 = load i32, i32* %b, align 4 127 %add1 = add i32 %i0, %i1 128 %call1 = tail call i32 @llvm.ctlz.i32(i32 %add1,i1 true) nounwind readnone 129 130 %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1 131 %i2 = load i32, i32* %arrayidx2, align 4 132 %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1 133 %i3 = load i32, i32* %arrayidx3, align 4 134 %add2 = add i32 %i2, %i3 135 %call2 = tail call i32 @llvm.ctlz.i32(i32 %add2,i1 true) nounwind readnone 136 137 %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2 138 %i4 = load i32, i32* %arrayidx4, align 4 139 %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2 140 %i5 = load i32, i32* %arrayidx5, align 4 141 %add3 = add i32 %i4, %i5 142 %call3 = tail call i32 @llvm.ctlz.i32(i32 %add3,i1 true) nounwind readnone 143 144 %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3 145 %i6 = load i32, i32* %arrayidx6, align 4 146 %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3 147 %i7 = load i32, i32* %arrayidx7, align 4 148 %add4 = add i32 %i6, %i7 149 %call4 = tail call i32 @llvm.ctlz.i32(i32 %add4,i1 true) nounwind readnone 150 151 store i32 %call1, i32* %c, align 4 152 %arrayidx8 = getelementptr inbounds i32, i32* %c, i32 1 153 store i32 %call2, i32* %arrayidx8, align 4 154 %arrayidx9 = getelementptr inbounds i32, i32* %c, i32 2 155 store i32 %call3, i32* %arrayidx9, align 4 156 %arrayidx10 = getelementptr inbounds i32, i32* %c, i32 3 157 store i32 %call4, i32* %arrayidx10, align 4 158 ret void 159 160 ; CHECK-LABEL: @vec_ctlz_i32( 161 ; CHECK: load <4 x i32> 162 ; CHECK: load <4 x i32> 163 ; CHECK: call <4 x i32> @llvm.ctlz.v4i32 164 ; CHECK: store <4 x i32> 165 ; CHECK: ret 166 } 167 168 define void @vec_ctlz_i32_neg(i32* %a, i32* %b, i32* %c, i1) { 169 entry: 170 %i0 = load i32, i32* %a, align 4 171 %i1 = load i32, i32* %b, align 4 172 %add1 = add i32 %i0, %i1 173 %call1 = tail call i32 @llvm.ctlz.i32(i32 %add1,i1 true) nounwind readnone 174 175 %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1 176 %i2 = load i32, i32* %arrayidx2, align 4 177 %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1 178 %i3 = load i32, i32* %arrayidx3, align 4 179 %add2 = add i32 %i2, %i3 180 %call2 = tail call i32 @llvm.ctlz.i32(i32 %add2,i1 false) nounwind readnone 181 182 %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2 183 %i4 = load i32, i32* %arrayidx4, align 4 184 %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2 185 %i5 = load i32, i32* %arrayidx5, align 4 186 %add3 = add i32 %i4, %i5 187 %call3 = tail call i32 @llvm.ctlz.i32(i32 %add3,i1 true) nounwind readnone 188 189 %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3 190 %i6 = load i32, i32* %arrayidx6, align 4 191 %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3 192 %i7 = load i32, i32* %arrayidx7, align 4 193 %add4 = add i32 %i6, %i7 194 %call4 = tail call i32 @llvm.ctlz.i32(i32 %add4,i1 false) nounwind readnone 195 196 store i32 %call1, i32* %c, align 4 197 %arrayidx8 = getelementptr inbounds i32, i32* %c, i32 1 198 store i32 %call2, i32* %arrayidx8, align 4 199 %arrayidx9 = getelementptr inbounds i32, i32* %c, i32 2 200 store i32 %call3, i32* %arrayidx9, align 4 201 %arrayidx10 = getelementptr inbounds i32, i32* %c, i32 3 202 store i32 %call4, i32* %arrayidx10, align 4 203 ret void 204 205 ; CHECK-LABEL: @vec_ctlz_i32_neg( 206 ; CHECK-NOT: call <4 x i32> @llvm.ctlz.v4i32 207 208 } 209 210 211 declare i32 @llvm.cttz.i32(i32,i1) nounwind readnone 212 213 define void @vec_cttz_i32(i32* %a, i32* %b, i32* %c, i1) { 214 entry: 215 %i0 = load i32, i32* %a, align 4 216 %i1 = load i32, i32* %b, align 4 217 %add1 = add i32 %i0, %i1 218 %call1 = tail call i32 @llvm.cttz.i32(i32 %add1,i1 true) nounwind readnone 219 220 %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1 221 %i2 = load i32, i32* %arrayidx2, align 4 222 %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1 223 %i3 = load i32, i32* %arrayidx3, align 4 224 %add2 = add i32 %i2, %i3 225 %call2 = tail call i32 @llvm.cttz.i32(i32 %add2,i1 true) nounwind readnone 226 227 %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2 228 %i4 = load i32, i32* %arrayidx4, align 4 229 %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2 230 %i5 = load i32, i32* %arrayidx5, align 4 231 %add3 = add i32 %i4, %i5 232 %call3 = tail call i32 @llvm.cttz.i32(i32 %add3,i1 true) nounwind readnone 233 234 %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3 235 %i6 = load i32, i32* %arrayidx6, align 4 236 %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3 237 %i7 = load i32, i32* %arrayidx7, align 4 238 %add4 = add i32 %i6, %i7 239 %call4 = tail call i32 @llvm.cttz.i32(i32 %add4,i1 true) nounwind readnone 240 241 store i32 %call1, i32* %c, align 4 242 %arrayidx8 = getelementptr inbounds i32, i32* %c, i32 1 243 store i32 %call2, i32* %arrayidx8, align 4 244 %arrayidx9 = getelementptr inbounds i32, i32* %c, i32 2 245 store i32 %call3, i32* %arrayidx9, align 4 246 %arrayidx10 = getelementptr inbounds i32, i32* %c, i32 3 247 store i32 %call4, i32* %arrayidx10, align 4 248 ret void 249 250 ; CHECK-LABEL: @vec_cttz_i32( 251 ; CHECK: load <4 x i32> 252 ; CHECK: load <4 x i32> 253 ; CHECK: call <4 x i32> @llvm.cttz.v4i32 254 ; CHECK: store <4 x i32> 255 ; CHECK: ret 256 } 257 258 define void @vec_cttz_i32_neg(i32* %a, i32* %b, i32* %c, i1) { 259 entry: 260 %i0 = load i32, i32* %a, align 4 261 %i1 = load i32, i32* %b, align 4 262 %add1 = add i32 %i0, %i1 263 %call1 = tail call i32 @llvm.cttz.i32(i32 %add1,i1 true) nounwind readnone 264 265 %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1 266 %i2 = load i32, i32* %arrayidx2, align 4 267 %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1 268 %i3 = load i32, i32* %arrayidx3, align 4 269 %add2 = add i32 %i2, %i3 270 %call2 = tail call i32 @llvm.cttz.i32(i32 %add2,i1 false) nounwind readnone 271 272 %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2 273 %i4 = load i32, i32* %arrayidx4, align 4 274 %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2 275 %i5 = load i32, i32* %arrayidx5, align 4 276 %add3 = add i32 %i4, %i5 277 %call3 = tail call i32 @llvm.cttz.i32(i32 %add3,i1 true) nounwind readnone 278 279 %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3 280 %i6 = load i32, i32* %arrayidx6, align 4 281 %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3 282 %i7 = load i32, i32* %arrayidx7, align 4 283 %add4 = add i32 %i6, %i7 284 %call4 = tail call i32 @llvm.cttz.i32(i32 %add4,i1 false) nounwind readnone 285 286 store i32 %call1, i32* %c, align 4 287 %arrayidx8 = getelementptr inbounds i32, i32* %c, i32 1 288 store i32 %call2, i32* %arrayidx8, align 4 289 %arrayidx9 = getelementptr inbounds i32, i32* %c, i32 2 290 store i32 %call3, i32* %arrayidx9, align 4 291 %arrayidx10 = getelementptr inbounds i32, i32* %c, i32 3 292 store i32 %call4, i32* %arrayidx10, align 4 293 ret void 294 295 ; CHECK-LABEL: @vec_cttz_i32_neg( 296 ; CHECK-NOT: call <4 x i32> @llvm.cttz.v4i32 297 } 298 299 300 declare float @llvm.powi.f32(float, i32) 301 define void @vec_powi_f32(float* %a, float* %b, float* %c, i32 %P) { 302 entry: 303 %i0 = load float, float* %a, align 4 304 %i1 = load float, float* %b, align 4 305 %add1 = fadd float %i0, %i1 306 %call1 = tail call float @llvm.powi.f32(float %add1,i32 %P) nounwind readnone 307 308 %arrayidx2 = getelementptr inbounds float, float* %a, i32 1 309 %i2 = load float, float* %arrayidx2, align 4 310 %arrayidx3 = getelementptr inbounds float, float* %b, i32 1 311 %i3 = load float, float* %arrayidx3, align 4 312 %add2 = fadd float %i2, %i3 313 %call2 = tail call float @llvm.powi.f32(float %add2,i32 %P) nounwind readnone 314 315 %arrayidx4 = getelementptr inbounds float, float* %a, i32 2 316 %i4 = load float, float* %arrayidx4, align 4 317 %arrayidx5 = getelementptr inbounds float, float* %b, i32 2 318 %i5 = load float, float* %arrayidx5, align 4 319 %add3 = fadd float %i4, %i5 320 %call3 = tail call float @llvm.powi.f32(float %add3,i32 %P) nounwind readnone 321 322 %arrayidx6 = getelementptr inbounds float, float* %a, i32 3 323 %i6 = load float, float* %arrayidx6, align 4 324 %arrayidx7 = getelementptr inbounds float, float* %b, i32 3 325 %i7 = load float, float* %arrayidx7, align 4 326 %add4 = fadd float %i6, %i7 327 %call4 = tail call float @llvm.powi.f32(float %add4,i32 %P) nounwind readnone 328 329 store float %call1, float* %c, align 4 330 %arrayidx8 = getelementptr inbounds float, float* %c, i32 1 331 store float %call2, float* %arrayidx8, align 4 332 %arrayidx9 = getelementptr inbounds float, float* %c, i32 2 333 store float %call3, float* %arrayidx9, align 4 334 %arrayidx10 = getelementptr inbounds float, float* %c, i32 3 335 store float %call4, float* %arrayidx10, align 4 336 ret void 337 338 ; CHECK-LABEL: @vec_powi_f32( 339 ; CHECK: load <4 x float> 340 ; CHECK: load <4 x float> 341 ; CHECK: call <4 x float> @llvm.powi.v4f32 342 ; CHECK: store <4 x float> 343 ; CHECK: ret 344 } 345 346 347 define void @vec_powi_f32_neg(float* %a, float* %b, float* %c, i32 %P, i32 %Q) { 348 entry: 349 %i0 = load float, float* %a, align 4 350 %i1 = load float, float* %b, align 4 351 %add1 = fadd float %i0, %i1 352 %call1 = tail call float @llvm.powi.f32(float %add1,i32 %P) nounwind readnone 353 354 %arrayidx2 = getelementptr inbounds float, float* %a, i32 1 355 %i2 = load float, float* %arrayidx2, align 4 356 %arrayidx3 = getelementptr inbounds float, float* %b, i32 1 357 %i3 = load float, float* %arrayidx3, align 4 358 %add2 = fadd float %i2, %i3 359 %call2 = tail call float @llvm.powi.f32(float %add2,i32 %Q) nounwind readnone 360 361 %arrayidx4 = getelementptr inbounds float, float* %a, i32 2 362 %i4 = load float, float* %arrayidx4, align 4 363 %arrayidx5 = getelementptr inbounds float, float* %b, i32 2 364 %i5 = load float, float* %arrayidx5, align 4 365 %add3 = fadd float %i4, %i5 366 %call3 = tail call float @llvm.powi.f32(float %add3,i32 %P) nounwind readnone 367 368 %arrayidx6 = getelementptr inbounds float, float* %a, i32 3 369 %i6 = load float, float* %arrayidx6, align 4 370 %arrayidx7 = getelementptr inbounds float, float* %b, i32 3 371 %i7 = load float, float* %arrayidx7, align 4 372 %add4 = fadd float %i6, %i7 373 %call4 = tail call float @llvm.powi.f32(float %add4,i32 %Q) nounwind readnone 374 375 store float %call1, float* %c, align 4 376 %arrayidx8 = getelementptr inbounds float, float* %c, i32 1 377 store float %call2, float* %arrayidx8, align 4 378 %arrayidx9 = getelementptr inbounds float, float* %c, i32 2 379 store float %call3, float* %arrayidx9, align 4 380 %arrayidx10 = getelementptr inbounds float, float* %c, i32 3 381 store float %call4, float* %arrayidx10, align 4 382 ret void 383 384 ; CHECK-LABEL: @vec_powi_f32_neg( 385 ; CHECK-NOT: call <4 x float> @llvm.powi.v4f32 386 } 387