1 ; RUN: llc -mcpu=x86-64 -mattr=+sse2 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s 2 ; RUN: llc -mcpu=x86-64 -mattr=+sse4.1 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE41 %s 3 ; RUN: llc -mcpu=x86-64 -mattr=+avx < %s | FileCheck --check-prefix=AVX %s 4 5 target triple = "x86_64-unknown-unknown" 6 7 ; Ensure that the backend no longer emits unnecessary vector insert 8 ; instructions immediately after SSE scalar fp instructions 9 ; like addss or mulss. 10 11 define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) { 12 ; SSE-LABEL: test_add_ss: 13 ; SSE: # BB#0: 14 ; SSE-NEXT: addss %xmm1, %xmm0 15 ; SSE-NEXT: retq 16 ; 17 ; AVX-LABEL: test_add_ss: 18 ; AVX: # BB#0: 19 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 20 ; AVX-NEXT: retq 21 %1 = extractelement <4 x float> %b, i32 0 22 %2 = extractelement <4 x float> %a, i32 0 23 %add = fadd float %2, %1 24 %3 = insertelement <4 x float> %a, float %add, i32 0 25 ret <4 x float> %3 26 } 27 28 define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) { 29 ; SSE-LABEL: test_sub_ss: 30 ; SSE: # BB#0: 31 ; SSE-NEXT: subss %xmm1, %xmm0 32 ; SSE-NEXT: retq 33 ; 34 ; AVX-LABEL: test_sub_ss: 35 ; AVX: # BB#0: 36 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 37 ; AVX-NEXT: retq 38 %1 = extractelement <4 x float> %b, i32 0 39 %2 = extractelement <4 x float> %a, i32 0 40 %sub = fsub float %2, %1 41 %3 = insertelement <4 x float> %a, float %sub, i32 0 42 ret <4 x float> %3 43 } 44 45 define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) { 46 ; SSE-LABEL: test_mul_ss: 47 ; SSE: # BB#0: 48 ; SSE-NEXT: mulss %xmm1, %xmm0 49 ; SSE-NEXT: retq 50 ; 51 ; AVX-LABEL: test_mul_ss: 52 ; AVX: # BB#0: 53 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 54 ; AVX-NEXT: retq 55 %1 = extractelement <4 x float> %b, i32 0 56 %2 = extractelement <4 x float> %a, i32 0 57 %mul = fmul float %2, %1 58 %3 = insertelement <4 x float> %a, float %mul, i32 0 59 ret <4 x float> %3 60 } 61 62 define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) { 63 ; SSE-LABEL: test_div_ss: 64 ; SSE: # BB#0: 65 ; SSE-NEXT: divss %xmm1, %xmm0 66 ; SSE-NEXT: retq 67 ; 68 ; AVX-LABEL: test_div_ss: 69 ; AVX: # BB#0: 70 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 71 ; AVX-NEXT: retq 72 %1 = extractelement <4 x float> %b, i32 0 73 %2 = extractelement <4 x float> %a, i32 0 74 %div = fdiv float %2, %1 75 %3 = insertelement <4 x float> %a, float %div, i32 0 76 ret <4 x float> %3 77 } 78 79 define <4 x float> @test_sqrt_ss(<4 x float> %a) { 80 ; SSE2-LABEL: test_sqrt_ss: 81 ; SSE2: # BB#0: 82 ; SSE2-NEXT: sqrtss %xmm0, %xmm1 83 ; SSE2-NEXT: movss %xmm1, %xmm0 84 ; SSE2-NEXT: retq 85 ; 86 ; SSE41-LABEL: test_sqrt_ss: 87 ; SSE41: # BB#0: 88 ; SSE41-NEXT: sqrtss %xmm0, %xmm1 89 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 90 ; SSE41-NEXT: retq 91 ; 92 ; AVX-LABEL: test_sqrt_ss: 93 ; AVX: # BB#0: 94 ; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm1 95 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 96 ; AVX-NEXT: retq 97 %1 = extractelement <4 x float> %a, i32 0 98 %2 = call float @llvm.sqrt.f32(float %1) 99 %3 = insertelement <4 x float> %a, float %2, i32 0 100 ret <4 x float> %3 101 } 102 declare float @llvm.sqrt.f32(float) 103 104 define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) { 105 ; SSE-LABEL: test_add_sd: 106 ; SSE: # BB#0: 107 ; SSE-NEXT: addsd %xmm1, %xmm0 108 ; SSE-NEXT: retq 109 ; 110 ; AVX-LABEL: test_add_sd: 111 ; AVX: # BB#0: 112 ; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 113 ; AVX-NEXT: retq 114 %1 = extractelement <2 x double> %b, i32 0 115 %2 = extractelement <2 x double> %a, i32 0 116 %add = fadd double %2, %1 117 %3 = insertelement <2 x double> %a, double %add, i32 0 118 ret <2 x double> %3 119 } 120 121 define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) { 122 ; SSE-LABEL: test_sub_sd: 123 ; SSE: # BB#0: 124 ; SSE-NEXT: subsd %xmm1, %xmm0 125 ; SSE-NEXT: retq 126 ; 127 ; AVX-LABEL: test_sub_sd: 128 ; AVX: # BB#0: 129 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 130 ; AVX-NEXT: retq 131 %1 = extractelement <2 x double> %b, i32 0 132 %2 = extractelement <2 x double> %a, i32 0 133 %sub = fsub double %2, %1 134 %3 = insertelement <2 x double> %a, double %sub, i32 0 135 ret <2 x double> %3 136 } 137 138 define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) { 139 ; SSE-LABEL: test_mul_sd: 140 ; SSE: # BB#0: 141 ; SSE-NEXT: mulsd %xmm1, %xmm0 142 ; SSE-NEXT: retq 143 ; 144 ; AVX-LABEL: test_mul_sd: 145 ; AVX: # BB#0: 146 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 147 ; AVX-NEXT: retq 148 %1 = extractelement <2 x double> %b, i32 0 149 %2 = extractelement <2 x double> %a, i32 0 150 %mul = fmul double %2, %1 151 %3 = insertelement <2 x double> %a, double %mul, i32 0 152 ret <2 x double> %3 153 } 154 155 define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) { 156 ; SSE-LABEL: test_div_sd: 157 ; SSE: # BB#0: 158 ; SSE-NEXT: divsd %xmm1, %xmm0 159 ; SSE-NEXT: retq 160 ; 161 ; AVX-LABEL: test_div_sd: 162 ; AVX: # BB#0: 163 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 164 ; AVX-NEXT: retq 165 %1 = extractelement <2 x double> %b, i32 0 166 %2 = extractelement <2 x double> %a, i32 0 167 %div = fdiv double %2, %1 168 %3 = insertelement <2 x double> %a, double %div, i32 0 169 ret <2 x double> %3 170 } 171 172 define <2 x double> @test_sqrt_sd(<2 x double> %a) { 173 ; SSE-LABEL: test_sqrt_sd: 174 ; SSE: # BB#0: 175 ; SSE-NEXT: sqrtsd %xmm0, %xmm1 176 ; SSE-NEXT: movsd %xmm1, %xmm0 177 ; SSE-NEXT: retq 178 ; 179 ; AVX-LABEL: test_sqrt_sd: 180 ; AVX: # BB#0: 181 ; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm1 182 ; AVX-NEXT: vmovsd %xmm1, %xmm0, %xmm0 183 ; AVX-NEXT: retq 184 %1 = extractelement <2 x double> %a, i32 0 185 %2 = call double @llvm.sqrt.f64(double %1) 186 %3 = insertelement <2 x double> %a, double %2, i32 0 187 ret <2 x double> %3 188 } 189 declare double @llvm.sqrt.f64(double) 190 191 define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) { 192 ; SSE-LABEL: test2_add_ss: 193 ; SSE: # BB#0: 194 ; SSE-NEXT: addss %xmm0, %xmm1 195 ; SSE-NEXT: movaps %xmm1, %xmm0 196 ; SSE-NEXT: retq 197 ; 198 ; AVX-LABEL: test2_add_ss: 199 ; AVX: # BB#0: 200 ; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 201 ; AVX-NEXT: retq 202 %1 = extractelement <4 x float> %a, i32 0 203 %2 = extractelement <4 x float> %b, i32 0 204 %add = fadd float %1, %2 205 %3 = insertelement <4 x float> %b, float %add, i32 0 206 ret <4 x float> %3 207 } 208 209 define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) { 210 ; SSE-LABEL: test2_sub_ss: 211 ; SSE: # BB#0: 212 ; SSE-NEXT: subss %xmm0, %xmm1 213 ; SSE-NEXT: movaps %xmm1, %xmm0 214 ; SSE-NEXT: retq 215 ; 216 ; AVX-LABEL: test2_sub_ss: 217 ; AVX: # BB#0: 218 ; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 219 ; AVX-NEXT: retq 220 %1 = extractelement <4 x float> %a, i32 0 221 %2 = extractelement <4 x float> %b, i32 0 222 %sub = fsub float %2, %1 223 %3 = insertelement <4 x float> %b, float %sub, i32 0 224 ret <4 x float> %3 225 } 226 227 define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) { 228 ; SSE-LABEL: test2_mul_ss: 229 ; SSE: # BB#0: 230 ; SSE-NEXT: mulss %xmm0, %xmm1 231 ; SSE-NEXT: movaps %xmm1, %xmm0 232 ; SSE-NEXT: retq 233 ; 234 ; AVX-LABEL: test2_mul_ss: 235 ; AVX: # BB#0: 236 ; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 237 ; AVX-NEXT: retq 238 %1 = extractelement <4 x float> %a, i32 0 239 %2 = extractelement <4 x float> %b, i32 0 240 %mul = fmul float %1, %2 241 %3 = insertelement <4 x float> %b, float %mul, i32 0 242 ret <4 x float> %3 243 } 244 245 define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) { 246 ; SSE-LABEL: test2_div_ss: 247 ; SSE: # BB#0: 248 ; SSE-NEXT: divss %xmm0, %xmm1 249 ; SSE-NEXT: movaps %xmm1, %xmm0 250 ; SSE-NEXT: retq 251 ; 252 ; AVX-LABEL: test2_div_ss: 253 ; AVX: # BB#0: 254 ; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 255 ; AVX-NEXT: retq 256 %1 = extractelement <4 x float> %a, i32 0 257 %2 = extractelement <4 x float> %b, i32 0 258 %div = fdiv float %2, %1 259 %3 = insertelement <4 x float> %b, float %div, i32 0 260 ret <4 x float> %3 261 } 262 263 define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) { 264 ; SSE-LABEL: test2_add_sd: 265 ; SSE: # BB#0: 266 ; SSE-NEXT: addsd %xmm0, %xmm1 267 ; SSE-NEXT: movapd %xmm1, %xmm0 268 ; SSE-NEXT: retq 269 ; 270 ; AVX-LABEL: test2_add_sd: 271 ; AVX: # BB#0: 272 ; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 273 ; AVX-NEXT: retq 274 %1 = extractelement <2 x double> %a, i32 0 275 %2 = extractelement <2 x double> %b, i32 0 276 %add = fadd double %1, %2 277 %3 = insertelement <2 x double> %b, double %add, i32 0 278 ret <2 x double> %3 279 } 280 281 define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) { 282 ; SSE-LABEL: test2_sub_sd: 283 ; SSE: # BB#0: 284 ; SSE-NEXT: subsd %xmm0, %xmm1 285 ; SSE-NEXT: movapd %xmm1, %xmm0 286 ; SSE-NEXT: retq 287 ; 288 ; AVX-LABEL: test2_sub_sd: 289 ; AVX: # BB#0: 290 ; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 291 ; AVX-NEXT: retq 292 %1 = extractelement <2 x double> %a, i32 0 293 %2 = extractelement <2 x double> %b, i32 0 294 %sub = fsub double %2, %1 295 %3 = insertelement <2 x double> %b, double %sub, i32 0 296 ret <2 x double> %3 297 } 298 299 define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) { 300 ; SSE-LABEL: test2_mul_sd: 301 ; SSE: # BB#0: 302 ; SSE-NEXT: mulsd %xmm0, %xmm1 303 ; SSE-NEXT: movapd %xmm1, %xmm0 304 ; SSE-NEXT: retq 305 ; 306 ; AVX-LABEL: test2_mul_sd: 307 ; AVX: # BB#0: 308 ; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0 309 ; AVX-NEXT: retq 310 %1 = extractelement <2 x double> %a, i32 0 311 %2 = extractelement <2 x double> %b, i32 0 312 %mul = fmul double %1, %2 313 %3 = insertelement <2 x double> %b, double %mul, i32 0 314 ret <2 x double> %3 315 } 316 317 define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) { 318 ; SSE-LABEL: test2_div_sd: 319 ; SSE: # BB#0: 320 ; SSE-NEXT: divsd %xmm0, %xmm1 321 ; SSE-NEXT: movapd %xmm1, %xmm0 322 ; SSE-NEXT: retq 323 ; 324 ; AVX-LABEL: test2_div_sd: 325 ; AVX: # BB#0: 326 ; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 327 ; AVX-NEXT: retq 328 %1 = extractelement <2 x double> %a, i32 0 329 %2 = extractelement <2 x double> %b, i32 0 330 %div = fdiv double %2, %1 331 %3 = insertelement <2 x double> %b, double %div, i32 0 332 ret <2 x double> %3 333 } 334 335 define <4 x float> @test_multiple_add_ss(<4 x float> %a, <4 x float> %b) { 336 ; SSE-LABEL: test_multiple_add_ss: 337 ; SSE: # BB#0: 338 ; SSE-NEXT: addss %xmm0, %xmm1 339 ; SSE-NEXT: addss %xmm1, %xmm0 340 ; SSE-NEXT: retq 341 ; 342 ; AVX-LABEL: test_multiple_add_ss: 343 ; AVX: # BB#0: 344 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm1 345 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 346 ; AVX-NEXT: retq 347 %1 = extractelement <4 x float> %b, i32 0 348 %2 = extractelement <4 x float> %a, i32 0 349 %add = fadd float %2, %1 350 %add2 = fadd float %2, %add 351 %3 = insertelement <4 x float> %a, float %add2, i32 0 352 ret <4 x float> %3 353 } 354 355 define <4 x float> @test_multiple_sub_ss(<4 x float> %a, <4 x float> %b) { 356 ; SSE-LABEL: test_multiple_sub_ss: 357 ; SSE: # BB#0: 358 ; SSE-NEXT: movaps %xmm0, %xmm2 359 ; SSE-NEXT: subss %xmm1, %xmm2 360 ; SSE-NEXT: subss %xmm2, %xmm0 361 ; SSE-NEXT: retq 362 ; 363 ; AVX-LABEL: test_multiple_sub_ss: 364 ; AVX: # BB#0: 365 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm1 366 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 367 ; AVX-NEXT: retq 368 %1 = extractelement <4 x float> %b, i32 0 369 %2 = extractelement <4 x float> %a, i32 0 370 %sub = fsub float %2, %1 371 %sub2 = fsub float %2, %sub 372 %3 = insertelement <4 x float> %a, float %sub2, i32 0 373 ret <4 x float> %3 374 } 375 376 define <4 x float> @test_multiple_mul_ss(<4 x float> %a, <4 x float> %b) { 377 ; SSE-LABEL: test_multiple_mul_ss: 378 ; SSE: # BB#0: 379 ; SSE-NEXT: mulss %xmm0, %xmm1 380 ; SSE-NEXT: mulss %xmm1, %xmm0 381 ; SSE-NEXT: retq 382 ; 383 ; AVX-LABEL: test_multiple_mul_ss: 384 ; AVX: # BB#0: 385 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm1 386 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 387 ; AVX-NEXT: retq 388 %1 = extractelement <4 x float> %b, i32 0 389 %2 = extractelement <4 x float> %a, i32 0 390 %mul = fmul float %2, %1 391 %mul2 = fmul float %2, %mul 392 %3 = insertelement <4 x float> %a, float %mul2, i32 0 393 ret <4 x float> %3 394 } 395 396 define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) { 397 ; SSE-LABEL: test_multiple_div_ss: 398 ; SSE: # BB#0: 399 ; SSE-NEXT: movaps %xmm0, %xmm2 400 ; SSE-NEXT: divss %xmm1, %xmm2 401 ; SSE-NEXT: divss %xmm2, %xmm0 402 ; SSE-NEXT: retq 403 ; 404 ; AVX-LABEL: test_multiple_div_ss: 405 ; AVX: # BB#0: 406 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm1 407 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 408 ; AVX-NEXT: retq 409 %1 = extractelement <4 x float> %b, i32 0 410 %2 = extractelement <4 x float> %a, i32 0 411 %div = fdiv float %2, %1 412 %div2 = fdiv float %2, %div 413 %3 = insertelement <4 x float> %a, float %div2, i32 0 414 ret <4 x float> %3 415 } 416 417 ; With SSE4.1 or greater, the shuffles in the following tests may 418 ; be lowered to X86Blendi nodes. 419 420 define <4 x float> @blend_add_ss(<4 x float> %a, float %b) { 421 ; SSE-LABEL: blend_add_ss: 422 ; SSE: # BB#0: 423 ; SSE-NEXT: addss %xmm1, %xmm0 424 ; SSE-NEXT: retq 425 ; 426 ; AVX-LABEL: blend_add_ss: 427 ; AVX: # BB#0: 428 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 429 ; AVX-NEXT: retq 430 431 %ext = extractelement <4 x float> %a, i32 0 432 %op = fadd float %b, %ext 433 %ins = insertelement <4 x float> undef, float %op, i32 0 434 %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 435 ret <4 x float> %shuf 436 } 437 438 define <4 x float> @blend_sub_ss(<4 x float> %a, float %b) { 439 ; SSE-LABEL: blend_sub_ss: 440 ; SSE: # BB#0: 441 ; SSE-NEXT: subss %xmm1, %xmm0 442 ; SSE-NEXT: retq 443 ; 444 ; AVX-LABEL: blend_sub_ss: 445 ; AVX: # BB#0: 446 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 447 ; AVX-NEXT: retq 448 449 %ext = extractelement <4 x float> %a, i32 0 450 %op = fsub float %ext, %b 451 %ins = insertelement <4 x float> undef, float %op, i32 0 452 %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 453 ret <4 x float> %shuf 454 } 455 456 define <4 x float> @blend_mul_ss(<4 x float> %a, float %b) { 457 ; SSE-LABEL: blend_mul_ss: 458 ; SSE: # BB#0: 459 ; SSE-NEXT: mulss %xmm1, %xmm0 460 ; SSE-NEXT: retq 461 ; 462 ; AVX-LABEL: blend_mul_ss: 463 ; AVX: # BB#0: 464 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 465 ; AVX-NEXT: retq 466 467 %ext = extractelement <4 x float> %a, i32 0 468 %op = fmul float %b, %ext 469 %ins = insertelement <4 x float> undef, float %op, i32 0 470 %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 471 ret <4 x float> %shuf 472 } 473 474 define <4 x float> @blend_div_ss(<4 x float> %a, float %b) { 475 ; SSE-LABEL: blend_div_ss: 476 ; SSE: # BB#0: 477 ; SSE-NEXT: divss %xmm1, %xmm0 478 ; SSE-NEXT: retq 479 ; 480 ; AVX-LABEL: blend_div_ss: 481 ; AVX: # BB#0: 482 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 483 ; AVX-NEXT: retq 484 485 %ext = extractelement <4 x float> %a, i32 0 486 %op = fdiv float %ext, %b 487 %ins = insertelement <4 x float> undef, float %op, i32 0 488 %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 489 ret <4 x float> %shuf 490 } 491 492 define <2 x double> @blend_add_sd(<2 x double> %a, double %b) { 493 ; SSE-LABEL: blend_add_sd: 494 ; SSE: # BB#0: 495 ; SSE-NEXT: addsd %xmm1, %xmm0 496 ; SSE-NEXT: retq 497 ; 498 ; AVX-LABEL: blend_add_sd: 499 ; AVX: # BB#0: 500 ; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 501 ; AVX-NEXT: retq 502 503 %ext = extractelement <2 x double> %a, i32 0 504 %op = fadd double %b, %ext 505 %ins = insertelement <2 x double> undef, double %op, i32 0 506 %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3> 507 ret <2 x double> %shuf 508 } 509 510 define <2 x double> @blend_sub_sd(<2 x double> %a, double %b) { 511 ; SSE-LABEL: blend_sub_sd: 512 ; SSE: # BB#0: 513 ; SSE-NEXT: subsd %xmm1, %xmm0 514 ; SSE-NEXT: retq 515 ; 516 ; AVX-LABEL: blend_sub_sd: 517 ; AVX: # BB#0: 518 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 519 ; AVX-NEXT: retq 520 521 %ext = extractelement <2 x double> %a, i32 0 522 %op = fsub double %ext, %b 523 %ins = insertelement <2 x double> undef, double %op, i32 0 524 %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3> 525 ret <2 x double> %shuf 526 } 527 528 define <2 x double> @blend_mul_sd(<2 x double> %a, double %b) { 529 ; SSE-LABEL: blend_mul_sd: 530 ; SSE: # BB#0: 531 ; SSE-NEXT: mulsd %xmm1, %xmm0 532 ; SSE-NEXT: retq 533 ; 534 ; AVX-LABEL: blend_mul_sd: 535 ; AVX: # BB#0: 536 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 537 ; AVX-NEXT: retq 538 539 %ext = extractelement <2 x double> %a, i32 0 540 %op = fmul double %b, %ext 541 %ins = insertelement <2 x double> undef, double %op, i32 0 542 %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3> 543 ret <2 x double> %shuf 544 } 545 546 define <2 x double> @blend_div_sd(<2 x double> %a, double %b) { 547 ; SSE-LABEL: blend_div_sd: 548 ; SSE: # BB#0: 549 ; SSE-NEXT: divsd %xmm1, %xmm0 550 ; SSE-NEXT: retq 551 ; 552 ; AVX-LABEL: blend_div_sd: 553 ; AVX: # BB#0: 554 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 555 ; AVX-NEXT: retq 556 557 %ext = extractelement <2 x double> %a, i32 0 558 %op = fdiv double %ext, %b 559 %ins = insertelement <2 x double> undef, double %op, i32 0 560 %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3> 561 ret <2 x double> %shuf 562 } 563 564 ; Ensure that the backend selects SSE/AVX scalar fp instructions 565 ; from a packed fp instruction plus a vector insert. 566 567 define <4 x float> @insert_test_add_ss(<4 x float> %a, <4 x float> %b) { 568 ; SSE-LABEL: insert_test_add_ss: 569 ; SSE: # BB#0: 570 ; SSE-NEXT: addss %xmm1, %xmm0 571 ; SSE-NEXT: retq 572 ; 573 ; AVX-LABEL: insert_test_add_ss: 574 ; AVX: # BB#0: 575 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 576 ; AVX-NEXT: retq 577 %1 = fadd <4 x float> %a, %b 578 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 579 ret <4 x float> %2 580 } 581 582 define <4 x float> @insert_test_sub_ss(<4 x float> %a, <4 x float> %b) { 583 ; SSE-LABEL: insert_test_sub_ss: 584 ; SSE: # BB#0: 585 ; SSE-NEXT: subss %xmm1, %xmm0 586 ; SSE-NEXT: retq 587 ; 588 ; AVX-LABEL: insert_test_sub_ss: 589 ; AVX: # BB#0: 590 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 591 ; AVX-NEXT: retq 592 %1 = fsub <4 x float> %a, %b 593 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 594 ret <4 x float> %2 595 } 596 597 define <4 x float> @insert_test_mul_ss(<4 x float> %a, <4 x float> %b) { 598 ; SSE-LABEL: insert_test_mul_ss: 599 ; SSE: # BB#0: 600 ; SSE-NEXT: mulss %xmm1, %xmm0 601 ; SSE-NEXT: retq 602 ; 603 ; AVX-LABEL: insert_test_mul_ss: 604 ; AVX: # BB#0: 605 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 606 ; AVX-NEXT: retq 607 %1 = fmul <4 x float> %a, %b 608 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 609 ret <4 x float> %2 610 } 611 612 define <4 x float> @insert_test_div_ss(<4 x float> %a, <4 x float> %b) { 613 ; SSE-LABEL: insert_test_div_ss: 614 ; SSE: # BB#0: 615 ; SSE-NEXT: divss %xmm1, %xmm0 616 ; SSE-NEXT: retq 617 ; 618 ; AVX-LABEL: insert_test_div_ss: 619 ; AVX: # BB#0: 620 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 621 ; AVX-NEXT: retq 622 %1 = fdiv <4 x float> %a, %b 623 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 624 ret <4 x float> %2 625 } 626 627 define <2 x double> @insert_test_add_sd(<2 x double> %a, <2 x double> %b) { 628 ; SSE-LABEL: insert_test_add_sd: 629 ; SSE: # BB#0: 630 ; SSE-NEXT: addsd %xmm1, %xmm0 631 ; SSE-NEXT: retq 632 ; 633 ; AVX-LABEL: insert_test_add_sd: 634 ; AVX: # BB#0: 635 ; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 636 ; AVX-NEXT: retq 637 %1 = fadd <2 x double> %a, %b 638 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 639 ret <2 x double> %2 640 } 641 642 define <2 x double> @insert_test_sub_sd(<2 x double> %a, <2 x double> %b) { 643 ; SSE-LABEL: insert_test_sub_sd: 644 ; SSE: # BB#0: 645 ; SSE-NEXT: subsd %xmm1, %xmm0 646 ; SSE-NEXT: retq 647 ; 648 ; AVX-LABEL: insert_test_sub_sd: 649 ; AVX: # BB#0: 650 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 651 ; AVX-NEXT: retq 652 %1 = fsub <2 x double> %a, %b 653 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 654 ret <2 x double> %2 655 } 656 657 define <2 x double> @insert_test_mul_sd(<2 x double> %a, <2 x double> %b) { 658 ; SSE-LABEL: insert_test_mul_sd: 659 ; SSE: # BB#0: 660 ; SSE-NEXT: mulsd %xmm1, %xmm0 661 ; SSE-NEXT: retq 662 ; 663 ; AVX-LABEL: insert_test_mul_sd: 664 ; AVX: # BB#0: 665 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 666 ; AVX-NEXT: retq 667 %1 = fmul <2 x double> %a, %b 668 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 669 ret <2 x double> %2 670 } 671 672 define <2 x double> @insert_test_div_sd(<2 x double> %a, <2 x double> %b) { 673 ; SSE-LABEL: insert_test_div_sd: 674 ; SSE: # BB#0: 675 ; SSE-NEXT: divsd %xmm1, %xmm0 676 ; SSE-NEXT: retq 677 ; 678 ; AVX-LABEL: insert_test_div_sd: 679 ; AVX: # BB#0: 680 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 681 ; AVX-NEXT: retq 682 %1 = fdiv <2 x double> %a, %b 683 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 684 ret <2 x double> %2 685 } 686 687 define <4 x float> @insert_test2_add_ss(<4 x float> %a, <4 x float> %b) { 688 ; SSE-LABEL: insert_test2_add_ss: 689 ; SSE: # BB#0: 690 ; SSE-NEXT: addss %xmm0, %xmm1 691 ; SSE-NEXT: movaps %xmm1, %xmm0 692 ; SSE-NEXT: retq 693 ; 694 ; AVX-LABEL: insert_test2_add_ss: 695 ; AVX: # BB#0: 696 ; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 697 ; AVX-NEXT: retq 698 %1 = fadd <4 x float> %b, %a 699 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 700 ret <4 x float> %2 701 } 702 703 define <4 x float> @insert_test2_sub_ss(<4 x float> %a, <4 x float> %b) { 704 ; SSE-LABEL: insert_test2_sub_ss: 705 ; SSE: # BB#0: 706 ; SSE-NEXT: subss %xmm0, %xmm1 707 ; SSE-NEXT: movaps %xmm1, %xmm0 708 ; SSE-NEXT: retq 709 ; 710 ; AVX-LABEL: insert_test2_sub_ss: 711 ; AVX: # BB#0: 712 ; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 713 ; AVX-NEXT: retq 714 %1 = fsub <4 x float> %b, %a 715 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 716 ret <4 x float> %2 717 } 718 719 define <4 x float> @insert_test2_mul_ss(<4 x float> %a, <4 x float> %b) { 720 ; SSE-LABEL: insert_test2_mul_ss: 721 ; SSE: # BB#0: 722 ; SSE-NEXT: mulss %xmm0, %xmm1 723 ; SSE-NEXT: movaps %xmm1, %xmm0 724 ; SSE-NEXT: retq 725 ; 726 ; AVX-LABEL: insert_test2_mul_ss: 727 ; AVX: # BB#0: 728 ; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 729 ; AVX-NEXT: retq 730 %1 = fmul <4 x float> %b, %a 731 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 732 ret <4 x float> %2 733 } 734 735 define <4 x float> @insert_test2_div_ss(<4 x float> %a, <4 x float> %b) { 736 ; SSE-LABEL: insert_test2_div_ss: 737 ; SSE: # BB#0: 738 ; SSE-NEXT: divss %xmm0, %xmm1 739 ; SSE-NEXT: movaps %xmm1, %xmm0 740 ; SSE-NEXT: retq 741 ; 742 ; AVX-LABEL: insert_test2_div_ss: 743 ; AVX: # BB#0: 744 ; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 745 ; AVX-NEXT: retq 746 %1 = fdiv <4 x float> %b, %a 747 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 748 ret <4 x float> %2 749 } 750 751 define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) { 752 ; SSE-LABEL: insert_test2_add_sd: 753 ; SSE: # BB#0: 754 ; SSE-NEXT: addsd %xmm0, %xmm1 755 ; SSE-NEXT: movapd %xmm1, %xmm0 756 ; SSE-NEXT: retq 757 ; 758 ; AVX-LABEL: insert_test2_add_sd: 759 ; AVX: # BB#0: 760 ; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 761 ; AVX-NEXT: retq 762 %1 = fadd <2 x double> %b, %a 763 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 764 ret <2 x double> %2 765 } 766 767 define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) { 768 ; SSE-LABEL: insert_test2_sub_sd: 769 ; SSE: # BB#0: 770 ; SSE-NEXT: subsd %xmm0, %xmm1 771 ; SSE-NEXT: movapd %xmm1, %xmm0 772 ; SSE-NEXT: retq 773 ; 774 ; AVX-LABEL: insert_test2_sub_sd: 775 ; AVX: # BB#0: 776 ; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 777 ; AVX-NEXT: retq 778 %1 = fsub <2 x double> %b, %a 779 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 780 ret <2 x double> %2 781 } 782 783 define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) { 784 ; SSE-LABEL: insert_test2_mul_sd: 785 ; SSE: # BB#0: 786 ; SSE-NEXT: mulsd %xmm0, %xmm1 787 ; SSE-NEXT: movapd %xmm1, %xmm0 788 ; SSE-NEXT: retq 789 ; 790 ; AVX-LABEL: insert_test2_mul_sd: 791 ; AVX: # BB#0: 792 ; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0 793 ; AVX-NEXT: retq 794 %1 = fmul <2 x double> %b, %a 795 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 796 ret <2 x double> %2 797 } 798 799 define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) { 800 ; SSE-LABEL: insert_test2_div_sd: 801 ; SSE: # BB#0: 802 ; SSE-NEXT: divsd %xmm0, %xmm1 803 ; SSE-NEXT: movapd %xmm1, %xmm0 804 ; SSE-NEXT: retq 805 ; 806 ; AVX-LABEL: insert_test2_div_sd: 807 ; AVX: # BB#0: 808 ; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 809 ; AVX-NEXT: retq 810 %1 = fdiv <2 x double> %b, %a 811 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 812 ret <2 x double> %2 813 } 814 815 define <4 x float> @insert_test3_add_ss(<4 x float> %a, <4 x float> %b) { 816 ; SSE-LABEL: insert_test3_add_ss: 817 ; SSE: # BB#0: 818 ; SSE-NEXT: addss %xmm1, %xmm0 819 ; SSE-NEXT: retq 820 ; 821 ; AVX-LABEL: insert_test3_add_ss: 822 ; AVX: # BB#0: 823 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 824 ; AVX-NEXT: retq 825 %1 = fadd <4 x float> %a, %b 826 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 827 ret <4 x float> %2 828 } 829 830 define <4 x float> @insert_test3_sub_ss(<4 x float> %a, <4 x float> %b) { 831 ; SSE-LABEL: insert_test3_sub_ss: 832 ; SSE: # BB#0: 833 ; SSE-NEXT: subss %xmm1, %xmm0 834 ; SSE-NEXT: retq 835 ; 836 ; AVX-LABEL: insert_test3_sub_ss: 837 ; AVX: # BB#0: 838 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 839 ; AVX-NEXT: retq 840 %1 = fsub <4 x float> %a, %b 841 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 842 ret <4 x float> %2 843 } 844 845 define <4 x float> @insert_test3_mul_ss(<4 x float> %a, <4 x float> %b) { 846 ; SSE-LABEL: insert_test3_mul_ss: 847 ; SSE: # BB#0: 848 ; SSE-NEXT: mulss %xmm1, %xmm0 849 ; SSE-NEXT: retq 850 ; 851 ; AVX-LABEL: insert_test3_mul_ss: 852 ; AVX: # BB#0: 853 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 854 ; AVX-NEXT: retq 855 %1 = fmul <4 x float> %a, %b 856 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 857 ret <4 x float> %2 858 } 859 860 define <4 x float> @insert_test3_div_ss(<4 x float> %a, <4 x float> %b) { 861 ; SSE-LABEL: insert_test3_div_ss: 862 ; SSE: # BB#0: 863 ; SSE-NEXT: divss %xmm1, %xmm0 864 ; SSE-NEXT: retq 865 ; 866 ; AVX-LABEL: insert_test3_div_ss: 867 ; AVX: # BB#0: 868 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 869 ; AVX-NEXT: retq 870 %1 = fdiv <4 x float> %a, %b 871 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 872 ret <4 x float> %2 873 } 874 875 define <2 x double> @insert_test3_add_sd(<2 x double> %a, <2 x double> %b) { 876 ; SSE-LABEL: insert_test3_add_sd: 877 ; SSE: # BB#0: 878 ; SSE-NEXT: addsd %xmm1, %xmm0 879 ; SSE-NEXT: retq 880 ; 881 ; AVX-LABEL: insert_test3_add_sd: 882 ; AVX: # BB#0: 883 ; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 884 ; AVX-NEXT: retq 885 %1 = fadd <2 x double> %a, %b 886 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 887 ret <2 x double> %2 888 } 889 890 define <2 x double> @insert_test3_sub_sd(<2 x double> %a, <2 x double> %b) { 891 ; SSE-LABEL: insert_test3_sub_sd: 892 ; SSE: # BB#0: 893 ; SSE-NEXT: subsd %xmm1, %xmm0 894 ; SSE-NEXT: retq 895 ; 896 ; AVX-LABEL: insert_test3_sub_sd: 897 ; AVX: # BB#0: 898 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 899 ; AVX-NEXT: retq 900 %1 = fsub <2 x double> %a, %b 901 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 902 ret <2 x double> %2 903 } 904 905 define <2 x double> @insert_test3_mul_sd(<2 x double> %a, <2 x double> %b) { 906 ; SSE-LABEL: insert_test3_mul_sd: 907 ; SSE: # BB#0: 908 ; SSE-NEXT: mulsd %xmm1, %xmm0 909 ; SSE-NEXT: retq 910 ; 911 ; AVX-LABEL: insert_test3_mul_sd: 912 ; AVX: # BB#0: 913 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 914 ; AVX-NEXT: retq 915 %1 = fmul <2 x double> %a, %b 916 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 917 ret <2 x double> %2 918 } 919 920 define <2 x double> @insert_test3_div_sd(<2 x double> %a, <2 x double> %b) { 921 ; SSE-LABEL: insert_test3_div_sd: 922 ; SSE: # BB#0: 923 ; SSE-NEXT: divsd %xmm1, %xmm0 924 ; SSE-NEXT: retq 925 ; 926 ; AVX-LABEL: insert_test3_div_sd: 927 ; AVX: # BB#0: 928 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 929 ; AVX-NEXT: retq 930 %1 = fdiv <2 x double> %a, %b 931 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 932 ret <2 x double> %2 933 } 934 935 define <4 x float> @insert_test4_add_ss(<4 x float> %a, <4 x float> %b) { 936 ; SSE-LABEL: insert_test4_add_ss: 937 ; SSE: # BB#0: 938 ; SSE-NEXT: addss %xmm0, %xmm1 939 ; SSE-NEXT: movaps %xmm1, %xmm0 940 ; SSE-NEXT: retq 941 ; 942 ; AVX-LABEL: insert_test4_add_ss: 943 ; AVX: # BB#0: 944 ; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 945 ; AVX-NEXT: retq 946 %1 = fadd <4 x float> %b, %a 947 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 948 ret <4 x float> %2 949 } 950 951 define <4 x float> @insert_test4_sub_ss(<4 x float> %a, <4 x float> %b) { 952 ; SSE-LABEL: insert_test4_sub_ss: 953 ; SSE: # BB#0: 954 ; SSE-NEXT: subss %xmm0, %xmm1 955 ; SSE-NEXT: movaps %xmm1, %xmm0 956 ; SSE-NEXT: retq 957 ; 958 ; AVX-LABEL: insert_test4_sub_ss: 959 ; AVX: # BB#0: 960 ; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 961 ; AVX-NEXT: retq 962 %1 = fsub <4 x float> %b, %a 963 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 964 ret <4 x float> %2 965 } 966 967 define <4 x float> @insert_test4_mul_ss(<4 x float> %a, <4 x float> %b) { 968 ; SSE-LABEL: insert_test4_mul_ss: 969 ; SSE: # BB#0: 970 ; SSE-NEXT: mulss %xmm0, %xmm1 971 ; SSE-NEXT: movaps %xmm1, %xmm0 972 ; SSE-NEXT: retq 973 ; 974 ; AVX-LABEL: insert_test4_mul_ss: 975 ; AVX: # BB#0: 976 ; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 977 ; AVX-NEXT: retq 978 %1 = fmul <4 x float> %b, %a 979 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 980 ret <4 x float> %2 981 } 982 983 define <4 x float> @insert_test4_div_ss(<4 x float> %a, <4 x float> %b) { 984 ; SSE-LABEL: insert_test4_div_ss: 985 ; SSE: # BB#0: 986 ; SSE-NEXT: divss %xmm0, %xmm1 987 ; SSE-NEXT: movaps %xmm1, %xmm0 988 ; SSE-NEXT: retq 989 ; 990 ; AVX-LABEL: insert_test4_div_ss: 991 ; AVX: # BB#0: 992 ; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 993 ; AVX-NEXT: retq 994 %1 = fdiv <4 x float> %b, %a 995 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 996 ret <4 x float> %2 997 } 998 999 define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) { 1000 ; SSE-LABEL: insert_test4_add_sd: 1001 ; SSE: # BB#0: 1002 ; SSE-NEXT: addsd %xmm0, %xmm1 1003 ; SSE-NEXT: movapd %xmm1, %xmm0 1004 ; SSE-NEXT: retq 1005 ; 1006 ; AVX-LABEL: insert_test4_add_sd: 1007 ; AVX: # BB#0: 1008 ; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 1009 ; AVX-NEXT: retq 1010 %1 = fadd <2 x double> %b, %a 1011 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 1012 ret <2 x double> %2 1013 } 1014 1015 define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) { 1016 ; SSE-LABEL: insert_test4_sub_sd: 1017 ; SSE: # BB#0: 1018 ; SSE-NEXT: subsd %xmm0, %xmm1 1019 ; SSE-NEXT: movapd %xmm1, %xmm0 1020 ; SSE-NEXT: retq 1021 ; 1022 ; AVX-LABEL: insert_test4_sub_sd: 1023 ; AVX: # BB#0: 1024 ; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 1025 ; AVX-NEXT: retq 1026 %1 = fsub <2 x double> %b, %a 1027 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 1028 ret <2 x double> %2 1029 } 1030 1031 define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) { 1032 ; SSE-LABEL: insert_test4_mul_sd: 1033 ; SSE: # BB#0: 1034 ; SSE-NEXT: mulsd %xmm0, %xmm1 1035 ; SSE-NEXT: movapd %xmm1, %xmm0 1036 ; SSE-NEXT: retq 1037 ; 1038 ; AVX-LABEL: insert_test4_mul_sd: 1039 ; AVX: # BB#0: 1040 ; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0 1041 ; AVX-NEXT: retq 1042 %1 = fmul <2 x double> %b, %a 1043 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 1044 ret <2 x double> %2 1045 } 1046 1047 define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) { 1048 ; SSE-LABEL: insert_test4_div_sd: 1049 ; SSE: # BB#0: 1050 ; SSE-NEXT: divsd %xmm0, %xmm1 1051 ; SSE-NEXT: movapd %xmm1, %xmm0 1052 ; SSE-NEXT: retq 1053 ; 1054 ; AVX-LABEL: insert_test4_div_sd: 1055 ; AVX: # BB#0: 1056 ; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 1057 ; AVX-NEXT: retq 1058 %1 = fdiv <2 x double> %b, %a 1059 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 1060 ret <2 x double> %2 1061 } 1062