1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s 3 4 define <4 x double> @addpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { 5 ; CHECK-LABEL: addpd256: 6 ; CHECK: ## %bb.0: ## %entry 7 ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 8 ; CHECK-NEXT: retq 9 entry: 10 %add.i = fadd <4 x double> %x, %y 11 ret <4 x double> %add.i 12 } 13 14 define <4 x double> @addpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { 15 ; CHECK-LABEL: addpd256fold: 16 ; CHECK: ## %bb.0: ## %entry 17 ; CHECK-NEXT: vaddpd {{.*}}(%rip), %ymm0, %ymm0 18 ; CHECK-NEXT: retq 19 entry: 20 %add.i = fadd <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 21 ret <4 x double> %add.i 22 } 23 24 define <8 x float> @addps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { 25 ; CHECK-LABEL: addps256: 26 ; CHECK: ## %bb.0: ## %entry 27 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 28 ; CHECK-NEXT: retq 29 entry: 30 %add.i = fadd <8 x float> %x, %y 31 ret <8 x float> %add.i 32 } 33 34 define <8 x float> @addps256fold(<8 x float> %y) nounwind uwtable readnone ssp { 35 ; CHECK-LABEL: addps256fold: 36 ; CHECK: ## %bb.0: ## %entry 37 ; CHECK-NEXT: vaddps {{.*}}(%rip), %ymm0, %ymm0 38 ; CHECK-NEXT: retq 39 entry: 40 %add.i = fadd <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 41 ret <8 x float> %add.i 42 } 43 44 define <4 x double> @subpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { 45 ; CHECK-LABEL: subpd256: 46 ; CHECK: ## %bb.0: ## %entry 47 ; CHECK-NEXT: vsubpd %ymm0, %ymm1, %ymm0 48 ; CHECK-NEXT: retq 49 entry: 50 %sub.i = fsub <4 x double> %x, %y 51 ret <4 x double> %sub.i 52 } 53 54 define <4 x double> @subpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp { 55 ; CHECK-LABEL: subpd256fold: 56 ; CHECK: ## %bb.0: ## %entry 57 ; CHECK-NEXT: vsubpd (%rdi), %ymm0, %ymm0 58 ; CHECK-NEXT: retq 59 entry: 60 %tmp2 = load <4 x double>, <4 x double>* %x, align 32 61 %sub.i = fsub <4 x double> %y, %tmp2 62 ret <4 x double> %sub.i 63 } 64 65 define <8 x float> @subps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { 66 ; CHECK-LABEL: subps256: 67 ; CHECK: ## %bb.0: ## %entry 68 ; CHECK-NEXT: vsubps %ymm0, %ymm1, %ymm0 69 ; CHECK-NEXT: retq 70 entry: 71 %sub.i = fsub <8 x float> %x, %y 72 ret <8 x float> %sub.i 73 } 74 75 define <8 x float> @subps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp { 76 ; CHECK-LABEL: subps256fold: 77 ; CHECK: ## %bb.0: ## %entry 78 ; CHECK-NEXT: vsubps (%rdi), %ymm0, %ymm0 79 ; CHECK-NEXT: retq 80 entry: 81 %tmp2 = load <8 x float>, <8 x float>* %x, align 32 82 %sub.i = fsub <8 x float> %y, %tmp2 83 ret <8 x float> %sub.i 84 } 85 86 define <4 x double> @mulpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { 87 ; CHECK-LABEL: mulpd256: 88 ; CHECK: ## %bb.0: ## %entry 89 ; CHECK-NEXT: vmulpd %ymm0, %ymm1, %ymm0 90 ; CHECK-NEXT: retq 91 entry: 92 %mul.i = fmul <4 x double> %x, %y 93 ret <4 x double> %mul.i 94 } 95 96 define <4 x double> @mulpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { 97 ; CHECK-LABEL: mulpd256fold: 98 ; CHECK: ## %bb.0: ## %entry 99 ; CHECK-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0 100 ; CHECK-NEXT: retq 101 entry: 102 %mul.i = fmul <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 103 ret <4 x double> %mul.i 104 } 105 106 define <8 x float> @mulps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { 107 ; CHECK-LABEL: mulps256: 108 ; CHECK: ## %bb.0: ## %entry 109 ; CHECK-NEXT: vmulps %ymm0, %ymm1, %ymm0 110 ; CHECK-NEXT: retq 111 entry: 112 %mul.i = fmul <8 x float> %x, %y 113 ret <8 x float> %mul.i 114 } 115 116 define <8 x float> @mulps256fold(<8 x float> %y) nounwind uwtable readnone ssp { 117 ; CHECK-LABEL: mulps256fold: 118 ; CHECK: ## %bb.0: ## %entry 119 ; CHECK-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 120 ; CHECK-NEXT: retq 121 entry: 122 %mul.i = fmul <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 123 ret <8 x float> %mul.i 124 } 125 126 define <4 x double> @divpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { 127 ; CHECK-LABEL: divpd256: 128 ; CHECK: ## %bb.0: ## %entry 129 ; CHECK-NEXT: vdivpd %ymm0, %ymm1, %ymm0 130 ; CHECK-NEXT: retq 131 entry: 132 %div.i = fdiv <4 x double> %x, %y 133 ret <4 x double> %div.i 134 } 135 136 define <4 x double> @divpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { 137 ; CHECK-LABEL: divpd256fold: 138 ; CHECK: ## %bb.0: ## %entry 139 ; CHECK-NEXT: vdivpd {{.*}}(%rip), %ymm0, %ymm0 140 ; CHECK-NEXT: retq 141 entry: 142 %div.i = fdiv <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 143 ret <4 x double> %div.i 144 } 145 146 define <8 x float> @divps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { 147 ; CHECK-LABEL: divps256: 148 ; CHECK: ## %bb.0: ## %entry 149 ; CHECK-NEXT: vdivps %ymm0, %ymm1, %ymm0 150 ; CHECK-NEXT: retq 151 entry: 152 %div.i = fdiv <8 x float> %x, %y 153 ret <8 x float> %div.i 154 } 155 156 define <8 x float> @divps256fold(<8 x float> %y) nounwind uwtable readnone ssp { 157 ; CHECK-LABEL: divps256fold: 158 ; CHECK: ## %bb.0: ## %entry 159 ; CHECK-NEXT: vdivps {{.*}}(%rip), %ymm0, %ymm0 160 ; CHECK-NEXT: retq 161 entry: 162 %div.i = fdiv <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 163 ret <8 x float> %div.i 164 } 165 166 define float @sqrtA(float %a) nounwind uwtable readnone ssp { 167 ; CHECK-LABEL: sqrtA: 168 ; CHECK: ## %bb.0: ## %entry 169 ; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 170 ; CHECK-NEXT: retq 171 entry: 172 %conv1 = tail call float @sqrtf(float %a) nounwind readnone 173 ret float %conv1 174 } 175 176 declare double @sqrt(double) readnone 177 178 define double @sqrtB(double %a) nounwind uwtable readnone ssp { 179 ; CHECK-LABEL: sqrtB: 180 ; CHECK: ## %bb.0: ## %entry 181 ; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 182 ; CHECK-NEXT: retq 183 entry: 184 %call = tail call double @sqrt(double %a) nounwind readnone 185 ret double %call 186 } 187 188 declare float @sqrtf(float) readnone 189 190 191 define <4 x i64> @vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 192 ; CHECK-LABEL: vpaddq: 193 ; CHECK: ## %bb.0: 194 ; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2 195 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3 196 ; CHECK-NEXT: vpaddq %xmm2, %xmm3, %xmm2 197 ; CHECK-NEXT: vpaddq %xmm1, %xmm0, %xmm0 198 ; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 199 ; CHECK-NEXT: retq 200 %x = add <4 x i64> %i, %j 201 ret <4 x i64> %x 202 } 203 204 define <8 x i32> @vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 205 ; CHECK-LABEL: vpaddd: 206 ; CHECK: ## %bb.0: 207 ; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2 208 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3 209 ; CHECK-NEXT: vpaddd %xmm2, %xmm3, %xmm2 210 ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 211 ; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 212 ; CHECK-NEXT: retq 213 %x = add <8 x i32> %i, %j 214 ret <8 x i32> %x 215 } 216 217 define <16 x i16> @vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 218 ; CHECK-LABEL: vpaddw: 219 ; CHECK: ## %bb.0: 220 ; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2 221 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3 222 ; CHECK-NEXT: vpaddw %xmm2, %xmm3, %xmm2 223 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 224 ; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 225 ; CHECK-NEXT: retq 226 %x = add <16 x i16> %i, %j 227 ret <16 x i16> %x 228 } 229 230 define <32 x i8> @vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 231 ; CHECK-LABEL: vpaddb: 232 ; CHECK: ## %bb.0: 233 ; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2 234 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3 235 ; CHECK-NEXT: vpaddb %xmm2, %xmm3, %xmm2 236 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 237 ; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 238 ; CHECK-NEXT: retq 239 %x = add <32 x i8> %i, %j 240 ret <32 x i8> %x 241 } 242 243 define <4 x i64> @vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 244 ; CHECK-LABEL: vpsubq: 245 ; CHECK: ## %bb.0: 246 ; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2 247 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3 248 ; CHECK-NEXT: vpsubq %xmm2, %xmm3, %xmm2 249 ; CHECK-NEXT: vpsubq %xmm1, %xmm0, %xmm0 250 ; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 251 ; CHECK-NEXT: retq 252 %x = sub <4 x i64> %i, %j 253 ret <4 x i64> %x 254 } 255 256 define <8 x i32> @vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 257 ; CHECK-LABEL: vpsubd: 258 ; CHECK: ## %bb.0: 259 ; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2 260 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3 261 ; CHECK-NEXT: vpsubd %xmm2, %xmm3, %xmm2 262 ; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0 263 ; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 264 ; CHECK-NEXT: retq 265 %x = sub <8 x i32> %i, %j 266 ret <8 x i32> %x 267 } 268 269 define <16 x i16> @vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 270 ; CHECK-LABEL: vpsubw: 271 ; CHECK: ## %bb.0: 272 ; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2 273 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3 274 ; CHECK-NEXT: vpsubw %xmm2, %xmm3, %xmm2 275 ; CHECK-NEXT: vpsubw %xmm1, %xmm0, %xmm0 276 ; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 277 ; CHECK-NEXT: retq 278 %x = sub <16 x i16> %i, %j 279 ret <16 x i16> %x 280 } 281 282 define <32 x i8> @vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 283 ; CHECK-LABEL: vpsubb: 284 ; CHECK: ## %bb.0: 285 ; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2 286 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3 287 ; CHECK-NEXT: vpsubb %xmm2, %xmm3, %xmm2 288 ; CHECK-NEXT: vpsubb %xmm1, %xmm0, %xmm0 289 ; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 290 ; CHECK-NEXT: retq 291 %x = sub <32 x i8> %i, %j 292 ret <32 x i8> %x 293 } 294 295 define <8 x i32> @vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 296 ; CHECK-LABEL: vpmulld: 297 ; CHECK: ## %bb.0: 298 ; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2 299 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3 300 ; CHECK-NEXT: vpmulld %xmm2, %xmm3, %xmm2 301 ; CHECK-NEXT: vpmulld %xmm1, %xmm0, %xmm0 302 ; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 303 ; CHECK-NEXT: retq 304 %x = mul <8 x i32> %i, %j 305 ret <8 x i32> %x 306 } 307 308 define <16 x i16> @vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 309 ; CHECK-LABEL: vpmullw: 310 ; CHECK: ## %bb.0: 311 ; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2 312 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3 313 ; CHECK-NEXT: vpmullw %xmm2, %xmm3, %xmm2 314 ; CHECK-NEXT: vpmullw %xmm1, %xmm0, %xmm0 315 ; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 316 ; CHECK-NEXT: retq 317 %x = mul <16 x i16> %i, %j 318 ret <16 x i16> %x 319 } 320 321 define <4 x i64> @mul_v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 322 ; CHECK-LABEL: mul_v4i64: 323 ; CHECK: ## %bb.0: 324 ; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2 325 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3 326 ; CHECK-NEXT: vpsrlq $32, %xmm3, %xmm4 327 ; CHECK-NEXT: vpmuludq %xmm2, %xmm4, %xmm4 328 ; CHECK-NEXT: vpsrlq $32, %xmm2, %xmm5 329 ; CHECK-NEXT: vpmuludq %xmm5, %xmm3, %xmm5 330 ; CHECK-NEXT: vpaddq %xmm4, %xmm5, %xmm4 331 ; CHECK-NEXT: vpsllq $32, %xmm4, %xmm4 332 ; CHECK-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 333 ; CHECK-NEXT: vpaddq %xmm4, %xmm2, %xmm2 334 ; CHECK-NEXT: vpsrlq $32, %xmm0, %xmm3 335 ; CHECK-NEXT: vpmuludq %xmm1, %xmm3, %xmm3 336 ; CHECK-NEXT: vpsrlq $32, %xmm1, %xmm4 337 ; CHECK-NEXT: vpmuludq %xmm4, %xmm0, %xmm4 338 ; CHECK-NEXT: vpaddq %xmm3, %xmm4, %xmm3 339 ; CHECK-NEXT: vpsllq $32, %xmm3, %xmm3 340 ; CHECK-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 341 ; CHECK-NEXT: vpaddq %xmm3, %xmm0, %xmm0 342 ; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 343 ; CHECK-NEXT: retq 344 %x = mul <4 x i64> %i, %j 345 ret <4 x i64> %x 346 } 347 348 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone 349 350 define <4 x float> @int_sqrt_ss() { 351 ; CHECK-LABEL: int_sqrt_ss: 352 ; CHECK: ## %bb.0: 353 ; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 354 ; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 355 ; CHECK-NEXT: retq 356 %x0 = load float, float addrspace(1)* undef, align 8 357 %x1 = insertelement <4 x float> undef, float %x0, i32 0 358 %x2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %x1) nounwind 359 ret <4 x float> %x2 360 } 361 362 define <2 x double> @vector_sqrt_scalar_load(double* %a0) optsize { 363 ; CHECK-LABEL: vector_sqrt_scalar_load: 364 ; CHECK: ## %bb.0: 365 ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 366 ; CHECK-NEXT: vsqrtpd %xmm0, %xmm0 367 ; CHECK-NEXT: retq 368 %a1 = load double, double* %a0 369 %a2 = insertelement <2 x double> undef, double %a1, i32 0 370 %res = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a2) ; <<2 x double>> [#uses=1] 371 ret <2 x double> %res 372 } 373 declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) nounwind readnone 374