Home | History | Annotate | Download | only in X86
      1 ; RUN: llc -mcpu=x86-64 -mattr=+sse2 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s
      2 ; RUN: llc -mcpu=x86-64 -mattr=+sse4.1 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE41 %s
      3 ; RUN: llc -mcpu=x86-64 -mattr=+avx < %s | FileCheck --check-prefix=AVX %s
      4 
      5 target triple = "x86_64-unknown-unknown"
      6 
      7 ; Ensure that the backend no longer emits unnecessary vector insert
      8 ; instructions immediately after SSE scalar fp instructions
      9 ; like addss or mulss.
     10 
     11 define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
     12 ; SSE-LABEL: test_add_ss:
     13 ; SSE:       # BB#0:
     14 ; SSE-NEXT:    addss %xmm1, %xmm0
     15 ; SSE-NEXT:    retq
     16 ;
     17 ; AVX-LABEL: test_add_ss:
     18 ; AVX:       # BB#0:
     19 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
     20 ; AVX-NEXT:    retq
     21   %1 = extractelement <4 x float> %b, i32 0
     22   %2 = extractelement <4 x float> %a, i32 0
     23   %add = fadd float %2, %1
     24   %3 = insertelement <4 x float> %a, float %add, i32 0
     25   ret <4 x float> %3
     26 }
     27 
     28 define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
     29 ; SSE-LABEL: test_sub_ss:
     30 ; SSE:       # BB#0:
     31 ; SSE-NEXT:    subss %xmm1, %xmm0
     32 ; SSE-NEXT:    retq
     33 ;
     34 ; AVX-LABEL: test_sub_ss:
     35 ; AVX:       # BB#0:
     36 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
     37 ; AVX-NEXT:    retq
     38   %1 = extractelement <4 x float> %b, i32 0
     39   %2 = extractelement <4 x float> %a, i32 0
     40   %sub = fsub float %2, %1
     41   %3 = insertelement <4 x float> %a, float %sub, i32 0
     42   ret <4 x float> %3
     43 }
     44 
     45 define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
     46 ; SSE-LABEL: test_mul_ss:
     47 ; SSE:       # BB#0:
     48 ; SSE-NEXT:    mulss %xmm1, %xmm0
     49 ; SSE-NEXT:    retq
     50 ;
     51 ; AVX-LABEL: test_mul_ss:
     52 ; AVX:       # BB#0:
     53 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
     54 ; AVX-NEXT:    retq
     55   %1 = extractelement <4 x float> %b, i32 0
     56   %2 = extractelement <4 x float> %a, i32 0
     57   %mul = fmul float %2, %1
     58   %3 = insertelement <4 x float> %a, float %mul, i32 0
     59   ret <4 x float> %3
     60 }
     61 
     62 define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
     63 ; SSE-LABEL: test_div_ss:
     64 ; SSE:       # BB#0:
     65 ; SSE-NEXT:    divss %xmm1, %xmm0
     66 ; SSE-NEXT:    retq
     67 ;
     68 ; AVX-LABEL: test_div_ss:
     69 ; AVX:       # BB#0:
     70 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
     71 ; AVX-NEXT:    retq
     72   %1 = extractelement <4 x float> %b, i32 0
     73   %2 = extractelement <4 x float> %a, i32 0
     74   %div = fdiv float %2, %1
     75   %3 = insertelement <4 x float> %a, float %div, i32 0
     76   ret <4 x float> %3
     77 }
     78 
     79 define <4 x float> @test_sqrt_ss(<4 x float> %a) {
     80 ; SSE2-LABEL: test_sqrt_ss:
     81 ; SSE2:       # BB#0:
     82 ; SSE2-NEXT:   sqrtss %xmm0, %xmm1
     83 ; SSE2-NEXT:   movss %xmm1, %xmm0
     84 ; SSE2-NEXT:   retq
     85 ;
     86 ; SSE41-LABEL: test_sqrt_ss:
     87 ; SSE41:       # BB#0:
     88 ; SSE41-NEXT:  sqrtss %xmm0, %xmm1
     89 ; SSE41-NEXT:  blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
     90 ; SSE41-NEXT:  retq
     91 ;
     92 ; AVX-LABEL: test_sqrt_ss:
     93 ; AVX:       # BB#0:
     94 ; AVX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm1
     95 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
     96 ; AVX-NEXT:    retq
     97   %1 = extractelement <4 x float> %a, i32 0
     98   %2 = call float @llvm.sqrt.f32(float %1)
     99   %3 = insertelement <4 x float> %a, float %2, i32 0
    100   ret <4 x float> %3
    101 }
    102 declare float @llvm.sqrt.f32(float)
    103 
    104 define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
    105 ; SSE-LABEL: test_add_sd:
    106 ; SSE:       # BB#0:
    107 ; SSE-NEXT:    addsd %xmm1, %xmm0
    108 ; SSE-NEXT:    retq
    109 ;
    110 ; AVX-LABEL: test_add_sd:
    111 ; AVX:       # BB#0:
    112 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
    113 ; AVX-NEXT:    retq
    114   %1 = extractelement <2 x double> %b, i32 0
    115   %2 = extractelement <2 x double> %a, i32 0
    116   %add = fadd double %2, %1
    117   %3 = insertelement <2 x double> %a, double %add, i32 0
    118   ret <2 x double> %3
    119 }
    120 
    121 define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
    122 ; SSE-LABEL: test_sub_sd:
    123 ; SSE:       # BB#0:
    124 ; SSE-NEXT:    subsd %xmm1, %xmm0
    125 ; SSE-NEXT:    retq
    126 ;
    127 ; AVX-LABEL: test_sub_sd:
    128 ; AVX:       # BB#0:
    129 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
    130 ; AVX-NEXT:    retq
    131   %1 = extractelement <2 x double> %b, i32 0
    132   %2 = extractelement <2 x double> %a, i32 0
    133   %sub = fsub double %2, %1
    134   %3 = insertelement <2 x double> %a, double %sub, i32 0
    135   ret <2 x double> %3
    136 }
    137 
    138 define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
    139 ; SSE-LABEL: test_mul_sd:
    140 ; SSE:       # BB#0:
    141 ; SSE-NEXT:    mulsd %xmm1, %xmm0
    142 ; SSE-NEXT:    retq
    143 ;
    144 ; AVX-LABEL: test_mul_sd:
    145 ; AVX:       # BB#0:
    146 ; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
    147 ; AVX-NEXT:    retq
    148   %1 = extractelement <2 x double> %b, i32 0
    149   %2 = extractelement <2 x double> %a, i32 0
    150   %mul = fmul double %2, %1
    151   %3 = insertelement <2 x double> %a, double %mul, i32 0
    152   ret <2 x double> %3
    153 }
    154 
    155 define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
    156 ; SSE-LABEL: test_div_sd:
    157 ; SSE:       # BB#0:
    158 ; SSE-NEXT:    divsd %xmm1, %xmm0
    159 ; SSE-NEXT:    retq
    160 ;
    161 ; AVX-LABEL: test_div_sd:
    162 ; AVX:       # BB#0:
    163 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
    164 ; AVX-NEXT:    retq
    165   %1 = extractelement <2 x double> %b, i32 0
    166   %2 = extractelement <2 x double> %a, i32 0
    167   %div = fdiv double %2, %1
    168   %3 = insertelement <2 x double> %a, double %div, i32 0
    169   ret <2 x double> %3
    170 }
    171 
    172 define <2 x double> @test_sqrt_sd(<2 x double> %a) {
    173 ; SSE-LABEL: test_sqrt_sd:
    174 ; SSE:       # BB#0:
    175 ; SSE-NEXT:    sqrtsd %xmm0, %xmm1
    176 ; SSE-NEXT:    movsd %xmm1, %xmm0
    177 ; SSE-NEXT:    retq
    178 ;
    179 ; AVX-LABEL: test_sqrt_sd:
    180 ; AVX:       # BB#0:
    181 ; AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm1
    182 ; AVX-NEXT:    vmovsd %xmm1, %xmm0, %xmm0
    183 ; AVX-NEXT:    retq
    184   %1 = extractelement <2 x double> %a, i32 0
    185   %2 = call double @llvm.sqrt.f64(double %1)
    186   %3 = insertelement <2 x double> %a, double %2, i32 0
    187   ret <2 x double> %3
    188 }
    189 declare double @llvm.sqrt.f64(double)
    190 
    191 define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) {
    192 ; SSE-LABEL: test2_add_ss:
    193 ; SSE:       # BB#0:
    194 ; SSE-NEXT:    addss %xmm0, %xmm1
    195 ; SSE-NEXT:    movaps %xmm1, %xmm0
    196 ; SSE-NEXT:    retq
    197 ;
    198 ; AVX-LABEL: test2_add_ss:
    199 ; AVX:       # BB#0:
    200 ; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
    201 ; AVX-NEXT:    retq
    202   %1 = extractelement <4 x float> %a, i32 0
    203   %2 = extractelement <4 x float> %b, i32 0
    204   %add = fadd float %1, %2
    205   %3 = insertelement <4 x float> %b, float %add, i32 0
    206   ret <4 x float> %3
    207 }
    208 
    209 define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) {
    210 ; SSE-LABEL: test2_sub_ss:
    211 ; SSE:       # BB#0:
    212 ; SSE-NEXT:    subss %xmm0, %xmm1
    213 ; SSE-NEXT:    movaps %xmm1, %xmm0
    214 ; SSE-NEXT:    retq
    215 ;
    216 ; AVX-LABEL: test2_sub_ss:
    217 ; AVX:       # BB#0:
    218 ; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
    219 ; AVX-NEXT:    retq
    220   %1 = extractelement <4 x float> %a, i32 0
    221   %2 = extractelement <4 x float> %b, i32 0
    222   %sub = fsub float %2, %1
    223   %3 = insertelement <4 x float> %b, float %sub, i32 0
    224   ret <4 x float> %3
    225 }
    226 
    227 define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) {
    228 ; SSE-LABEL: test2_mul_ss:
    229 ; SSE:       # BB#0:
    230 ; SSE-NEXT:    mulss %xmm0, %xmm1
    231 ; SSE-NEXT:    movaps %xmm1, %xmm0
    232 ; SSE-NEXT:    retq
    233 ;
    234 ; AVX-LABEL: test2_mul_ss:
    235 ; AVX:       # BB#0:
    236 ; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
    237 ; AVX-NEXT:    retq
    238   %1 = extractelement <4 x float> %a, i32 0
    239   %2 = extractelement <4 x float> %b, i32 0
    240   %mul = fmul float %1, %2
    241   %3 = insertelement <4 x float> %b, float %mul, i32 0
    242   ret <4 x float> %3
    243 }
    244 
    245 define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) {
    246 ; SSE-LABEL: test2_div_ss:
    247 ; SSE:       # BB#0:
    248 ; SSE-NEXT:    divss %xmm0, %xmm1
    249 ; SSE-NEXT:    movaps %xmm1, %xmm0
    250 ; SSE-NEXT:    retq
    251 ;
    252 ; AVX-LABEL: test2_div_ss:
    253 ; AVX:       # BB#0:
    254 ; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
    255 ; AVX-NEXT:    retq
    256   %1 = extractelement <4 x float> %a, i32 0
    257   %2 = extractelement <4 x float> %b, i32 0
    258   %div = fdiv float %2, %1
    259   %3 = insertelement <4 x float> %b, float %div, i32 0
    260   ret <4 x float> %3
    261 }
    262 
    263 define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) {
    264 ; SSE-LABEL: test2_add_sd:
    265 ; SSE:       # BB#0:
    266 ; SSE-NEXT:    addsd %xmm0, %xmm1
    267 ; SSE-NEXT:    movapd %xmm1, %xmm0
    268 ; SSE-NEXT:    retq
    269 ;
    270 ; AVX-LABEL: test2_add_sd:
    271 ; AVX:       # BB#0:
    272 ; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
    273 ; AVX-NEXT:    retq
    274   %1 = extractelement <2 x double> %a, i32 0
    275   %2 = extractelement <2 x double> %b, i32 0
    276   %add = fadd double %1, %2
    277   %3 = insertelement <2 x double> %b, double %add, i32 0
    278   ret <2 x double> %3
    279 }
    280 
    281 define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) {
    282 ; SSE-LABEL: test2_sub_sd:
    283 ; SSE:       # BB#0:
    284 ; SSE-NEXT:    subsd %xmm0, %xmm1
    285 ; SSE-NEXT:    movapd %xmm1, %xmm0
    286 ; SSE-NEXT:    retq
    287 ;
    288 ; AVX-LABEL: test2_sub_sd:
    289 ; AVX:       # BB#0:
    290 ; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
    291 ; AVX-NEXT:    retq
    292   %1 = extractelement <2 x double> %a, i32 0
    293   %2 = extractelement <2 x double> %b, i32 0
    294   %sub = fsub double %2, %1
    295   %3 = insertelement <2 x double> %b, double %sub, i32 0
    296   ret <2 x double> %3
    297 }
    298 
    299 define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) {
    300 ; SSE-LABEL: test2_mul_sd:
    301 ; SSE:       # BB#0:
    302 ; SSE-NEXT:    mulsd %xmm0, %xmm1
    303 ; SSE-NEXT:    movapd %xmm1, %xmm0
    304 ; SSE-NEXT:    retq
    305 ;
    306 ; AVX-LABEL: test2_mul_sd:
    307 ; AVX:       # BB#0:
    308 ; AVX-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
    309 ; AVX-NEXT:    retq
    310   %1 = extractelement <2 x double> %a, i32 0
    311   %2 = extractelement <2 x double> %b, i32 0
    312   %mul = fmul double %1, %2
    313   %3 = insertelement <2 x double> %b, double %mul, i32 0
    314   ret <2 x double> %3
    315 }
    316 
    317 define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) {
    318 ; SSE-LABEL: test2_div_sd:
    319 ; SSE:       # BB#0:
    320 ; SSE-NEXT:    divsd %xmm0, %xmm1
    321 ; SSE-NEXT:    movapd %xmm1, %xmm0
    322 ; SSE-NEXT:    retq
    323 ;
    324 ; AVX-LABEL: test2_div_sd:
    325 ; AVX:       # BB#0:
    326 ; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
    327 ; AVX-NEXT:    retq
    328   %1 = extractelement <2 x double> %a, i32 0
    329   %2 = extractelement <2 x double> %b, i32 0
    330   %div = fdiv double %2, %1
    331   %3 = insertelement <2 x double> %b, double %div, i32 0
    332   ret <2 x double> %3
    333 }
    334 
    335 define <4 x float> @test_multiple_add_ss(<4 x float> %a, <4 x float> %b) {
    336 ; SSE-LABEL: test_multiple_add_ss:
    337 ; SSE:       # BB#0:
    338 ; SSE-NEXT:    addss %xmm0, %xmm1
    339 ; SSE-NEXT:    addss %xmm1, %xmm0
    340 ; SSE-NEXT:    retq
    341 ;
    342 ; AVX-LABEL: test_multiple_add_ss:
    343 ; AVX:       # BB#0:
    344 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm1
    345 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
    346 ; AVX-NEXT:    retq
    347   %1 = extractelement <4 x float> %b, i32 0
    348   %2 = extractelement <4 x float> %a, i32 0
    349   %add = fadd float %2, %1
    350   %add2 = fadd float %2, %add
    351   %3 = insertelement <4 x float> %a, float %add2, i32 0
    352   ret <4 x float> %3
    353 }
    354 
    355 define <4 x float> @test_multiple_sub_ss(<4 x float> %a, <4 x float> %b) {
    356 ; SSE-LABEL: test_multiple_sub_ss:
    357 ; SSE:       # BB#0:
    358 ; SSE-NEXT:    movaps %xmm0, %xmm2
    359 ; SSE-NEXT:    subss %xmm1, %xmm2
    360 ; SSE-NEXT:    subss %xmm2, %xmm0
    361 ; SSE-NEXT:    retq
    362 ;
    363 ; AVX-LABEL: test_multiple_sub_ss:
    364 ; AVX:       # BB#0:
    365 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm1
    366 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
    367 ; AVX-NEXT:    retq
    368   %1 = extractelement <4 x float> %b, i32 0
    369   %2 = extractelement <4 x float> %a, i32 0
    370   %sub = fsub float %2, %1
    371   %sub2 = fsub float %2, %sub
    372   %3 = insertelement <4 x float> %a, float %sub2, i32 0
    373   ret <4 x float> %3
    374 }
    375 
    376 define <4 x float> @test_multiple_mul_ss(<4 x float> %a, <4 x float> %b) {
    377 ; SSE-LABEL: test_multiple_mul_ss:
    378 ; SSE:       # BB#0:
    379 ; SSE-NEXT:    mulss %xmm0, %xmm1
    380 ; SSE-NEXT:    mulss %xmm1, %xmm0
    381 ; SSE-NEXT:    retq
    382 ;
    383 ; AVX-LABEL: test_multiple_mul_ss:
    384 ; AVX:       # BB#0:
    385 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm1
    386 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
    387 ; AVX-NEXT:    retq
    388   %1 = extractelement <4 x float> %b, i32 0
    389   %2 = extractelement <4 x float> %a, i32 0
    390   %mul = fmul float %2, %1
    391   %mul2 = fmul float %2, %mul
    392   %3 = insertelement <4 x float> %a, float %mul2, i32 0
    393   ret <4 x float> %3
    394 }
    395 
    396 define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) {
    397 ; SSE-LABEL: test_multiple_div_ss:
    398 ; SSE:       # BB#0:
    399 ; SSE-NEXT:    movaps %xmm0, %xmm2
    400 ; SSE-NEXT:    divss %xmm1, %xmm2
    401 ; SSE-NEXT:    divss %xmm2, %xmm0
    402 ; SSE-NEXT:    retq
    403 ;
    404 ; AVX-LABEL: test_multiple_div_ss:
    405 ; AVX:       # BB#0:
    406 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm1
    407 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
    408 ; AVX-NEXT:    retq
    409   %1 = extractelement <4 x float> %b, i32 0
    410   %2 = extractelement <4 x float> %a, i32 0
    411   %div = fdiv float %2, %1
    412   %div2 = fdiv float %2, %div
    413   %3 = insertelement <4 x float> %a, float %div2, i32 0
    414   ret <4 x float> %3
    415 }
    416 
    417 ; With SSE4.1 or greater, the shuffles in the following tests may
    418 ; be lowered to X86Blendi nodes.
    419 
    420 define <4 x float> @blend_add_ss(<4 x float> %a, float %b) {
    421 ; SSE-LABEL: blend_add_ss:
    422 ; SSE:       # BB#0:
    423 ; SSE-NEXT:    addss %xmm1, %xmm0
    424 ; SSE-NEXT:    retq
    425 ;
    426 ; AVX-LABEL: blend_add_ss:
    427 ; AVX:       # BB#0:
    428 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
    429 ; AVX-NEXT:    retq
    430 
    431   %ext = extractelement <4 x float> %a, i32 0
    432   %op = fadd float %b, %ext
    433   %ins = insertelement <4 x float> undef, float %op, i32 0
    434   %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
    435   ret <4 x float> %shuf
    436 }
    437 
    438 define <4 x float> @blend_sub_ss(<4 x float> %a, float %b) {
    439 ; SSE-LABEL: blend_sub_ss:
    440 ; SSE:       # BB#0:
    441 ; SSE-NEXT:    subss %xmm1, %xmm0
    442 ; SSE-NEXT:    retq
    443 ;
    444 ; AVX-LABEL: blend_sub_ss:
    445 ; AVX:       # BB#0:
    446 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
    447 ; AVX-NEXT:    retq
    448 
    449   %ext = extractelement <4 x float> %a, i32 0
    450   %op = fsub float %ext, %b
    451   %ins = insertelement <4 x float> undef, float %op, i32 0
    452   %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
    453   ret <4 x float> %shuf
    454 }
    455 
    456 define <4 x float> @blend_mul_ss(<4 x float> %a, float %b) {
    457 ; SSE-LABEL: blend_mul_ss:
    458 ; SSE:       # BB#0:
    459 ; SSE-NEXT:    mulss %xmm1, %xmm0
    460 ; SSE-NEXT:    retq
    461 ;
    462 ; AVX-LABEL: blend_mul_ss:
    463 ; AVX:       # BB#0:
    464 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
    465 ; AVX-NEXT:    retq
    466 
    467   %ext = extractelement <4 x float> %a, i32 0
    468   %op = fmul float %b, %ext
    469   %ins = insertelement <4 x float> undef, float %op, i32 0
    470   %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
    471   ret <4 x float> %shuf
    472 }
    473 
    474 define <4 x float> @blend_div_ss(<4 x float> %a, float %b) {
    475 ; SSE-LABEL: blend_div_ss:
    476 ; SSE:       # BB#0:
    477 ; SSE-NEXT:    divss %xmm1, %xmm0
    478 ; SSE-NEXT:    retq
    479 ;
    480 ; AVX-LABEL: blend_div_ss:
    481 ; AVX:       # BB#0:
    482 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
    483 ; AVX-NEXT:    retq
    484 
    485   %ext = extractelement <4 x float> %a, i32 0
    486   %op = fdiv float %ext, %b
    487   %ins = insertelement <4 x float> undef, float %op, i32 0
    488   %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
    489   ret <4 x float> %shuf
    490 }
    491 
    492 define <2 x double> @blend_add_sd(<2 x double> %a, double %b) {
    493 ; SSE-LABEL: blend_add_sd:
    494 ; SSE:       # BB#0:
    495 ; SSE-NEXT:    addsd %xmm1, %xmm0
    496 ; SSE-NEXT:    retq
    497 ;
    498 ; AVX-LABEL: blend_add_sd:
    499 ; AVX:       # BB#0:
    500 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
    501 ; AVX-NEXT:    retq
    502 
    503   %ext = extractelement <2 x double> %a, i32 0
    504   %op = fadd double %b, %ext
    505   %ins = insertelement <2 x double> undef, double %op, i32 0
    506   %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3>
    507   ret <2 x double> %shuf
    508 }
    509 
    510 define <2 x double> @blend_sub_sd(<2 x double> %a, double %b) {
    511 ; SSE-LABEL: blend_sub_sd:
    512 ; SSE:       # BB#0:
    513 ; SSE-NEXT:    subsd %xmm1, %xmm0
    514 ; SSE-NEXT:    retq
    515 ;
    516 ; AVX-LABEL: blend_sub_sd:
    517 ; AVX:       # BB#0:
    518 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
    519 ; AVX-NEXT:    retq
    520 
    521   %ext = extractelement <2 x double> %a, i32 0
    522   %op = fsub double %ext, %b
    523   %ins = insertelement <2 x double> undef, double %op, i32 0
    524   %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3>
    525   ret <2 x double> %shuf
    526 }
    527 
    528 define <2 x double> @blend_mul_sd(<2 x double> %a, double %b) {
    529 ; SSE-LABEL: blend_mul_sd:
    530 ; SSE:       # BB#0:
    531 ; SSE-NEXT:    mulsd %xmm1, %xmm0
    532 ; SSE-NEXT:    retq
    533 ;
    534 ; AVX-LABEL: blend_mul_sd:
    535 ; AVX:       # BB#0:
    536 ; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
    537 ; AVX-NEXT:    retq
    538 
    539   %ext = extractelement <2 x double> %a, i32 0
    540   %op = fmul double %b, %ext
    541   %ins = insertelement <2 x double> undef, double %op, i32 0
    542   %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3>
    543   ret <2 x double> %shuf
    544 }
    545 
    546 define <2 x double> @blend_div_sd(<2 x double> %a, double %b) {
    547 ; SSE-LABEL: blend_div_sd:
    548 ; SSE:       # BB#0:
    549 ; SSE-NEXT:    divsd %xmm1, %xmm0
    550 ; SSE-NEXT:    retq
    551 ;
    552 ; AVX-LABEL: blend_div_sd:
    553 ; AVX:       # BB#0:
    554 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
    555 ; AVX-NEXT:    retq
    556 
    557   %ext = extractelement <2 x double> %a, i32 0
    558   %op = fdiv double %ext, %b
    559   %ins = insertelement <2 x double> undef, double %op, i32 0
    560   %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3>
    561   ret <2 x double> %shuf
    562 }
    563 
    564 ; Ensure that the backend selects SSE/AVX scalar fp instructions
    565 ; from a packed fp instruction plus a vector insert.
    566 
    567 define <4 x float> @insert_test_add_ss(<4 x float> %a, <4 x float> %b) {
    568 ; SSE-LABEL: insert_test_add_ss:
    569 ; SSE:       # BB#0:
    570 ; SSE-NEXT:    addss %xmm1, %xmm0
    571 ; SSE-NEXT:    retq
    572 ;
    573 ; AVX-LABEL: insert_test_add_ss:
    574 ; AVX:       # BB#0:
    575 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
    576 ; AVX-NEXT:    retq
    577   %1 = fadd <4 x float> %a, %b
    578   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
    579   ret <4 x float> %2
    580 }
    581 
    582 define <4 x float> @insert_test_sub_ss(<4 x float> %a, <4 x float> %b) {
    583 ; SSE-LABEL: insert_test_sub_ss:
    584 ; SSE:       # BB#0:
    585 ; SSE-NEXT:    subss %xmm1, %xmm0
    586 ; SSE-NEXT:    retq
    587 ;
    588 ; AVX-LABEL: insert_test_sub_ss:
    589 ; AVX:       # BB#0:
    590 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
    591 ; AVX-NEXT:    retq
    592   %1 = fsub <4 x float> %a, %b
    593   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
    594   ret <4 x float> %2
    595 }
    596 
    597 define <4 x float> @insert_test_mul_ss(<4 x float> %a, <4 x float> %b) {
    598 ; SSE-LABEL: insert_test_mul_ss:
    599 ; SSE:       # BB#0:
    600 ; SSE-NEXT:    mulss %xmm1, %xmm0
    601 ; SSE-NEXT:    retq
    602 ;
    603 ; AVX-LABEL: insert_test_mul_ss:
    604 ; AVX:       # BB#0:
    605 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
    606 ; AVX-NEXT:    retq
    607   %1 = fmul <4 x float> %a, %b
    608   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
    609   ret <4 x float> %2
    610 }
    611 
    612 define <4 x float> @insert_test_div_ss(<4 x float> %a, <4 x float> %b) {
    613 ; SSE-LABEL: insert_test_div_ss:
    614 ; SSE:       # BB#0:
    615 ; SSE-NEXT:    divss %xmm1, %xmm0
    616 ; SSE-NEXT:    retq
    617 ;
    618 ; AVX-LABEL: insert_test_div_ss:
    619 ; AVX:       # BB#0:
    620 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
    621 ; AVX-NEXT:    retq
    622   %1 = fdiv <4 x float> %a, %b
    623   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
    624   ret <4 x float> %2
    625 }
    626 
    627 define <2 x double> @insert_test_add_sd(<2 x double> %a, <2 x double> %b) {
    628 ; SSE-LABEL: insert_test_add_sd:
    629 ; SSE:       # BB#0:
    630 ; SSE-NEXT:    addsd %xmm1, %xmm0
    631 ; SSE-NEXT:    retq
    632 ;
    633 ; AVX-LABEL: insert_test_add_sd:
    634 ; AVX:       # BB#0:
    635 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
    636 ; AVX-NEXT:    retq
    637   %1 = fadd <2 x double> %a, %b
    638   %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
    639   ret <2 x double> %2
    640 }
    641 
    642 define <2 x double> @insert_test_sub_sd(<2 x double> %a, <2 x double> %b) {
    643 ; SSE-LABEL: insert_test_sub_sd:
    644 ; SSE:       # BB#0:
    645 ; SSE-NEXT:    subsd %xmm1, %xmm0
    646 ; SSE-NEXT:    retq
    647 ;
    648 ; AVX-LABEL: insert_test_sub_sd:
    649 ; AVX:       # BB#0:
    650 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
    651 ; AVX-NEXT:    retq
    652   %1 = fsub <2 x double> %a, %b
    653   %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
    654   ret <2 x double> %2
    655 }
    656 
    657 define <2 x double> @insert_test_mul_sd(<2 x double> %a, <2 x double> %b) {
    658 ; SSE-LABEL: insert_test_mul_sd:
    659 ; SSE:       # BB#0:
    660 ; SSE-NEXT:    mulsd %xmm1, %xmm0
    661 ; SSE-NEXT:    retq
    662 ;
    663 ; AVX-LABEL: insert_test_mul_sd:
    664 ; AVX:       # BB#0:
    665 ; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
    666 ; AVX-NEXT:    retq
    667   %1 = fmul <2 x double> %a, %b
    668   %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
    669   ret <2 x double> %2
    670 }
    671 
    672 define <2 x double> @insert_test_div_sd(<2 x double> %a, <2 x double> %b) {
    673 ; SSE-LABEL: insert_test_div_sd:
    674 ; SSE:       # BB#0:
    675 ; SSE-NEXT:    divsd %xmm1, %xmm0
    676 ; SSE-NEXT:    retq
    677 ;
    678 ; AVX-LABEL: insert_test_div_sd:
    679 ; AVX:       # BB#0:
    680 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
    681 ; AVX-NEXT:    retq
    682   %1 = fdiv <2 x double> %a, %b
    683   %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
    684   ret <2 x double> %2
    685 }
    686 
    687 define <4 x float> @insert_test2_add_ss(<4 x float> %a, <4 x float> %b) {
    688 ; SSE-LABEL: insert_test2_add_ss:
    689 ; SSE:       # BB#0:
    690 ; SSE-NEXT:    addss %xmm0, %xmm1
    691 ; SSE-NEXT:    movaps %xmm1, %xmm0
    692 ; SSE-NEXT:    retq
    693 ;
    694 ; AVX-LABEL: insert_test2_add_ss:
    695 ; AVX:       # BB#0:
    696 ; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
    697 ; AVX-NEXT:    retq
    698   %1 = fadd <4 x float> %b, %a
    699   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
    700   ret <4 x float> %2
    701 }
    702 
    703 define <4 x float> @insert_test2_sub_ss(<4 x float> %a, <4 x float> %b) {
    704 ; SSE-LABEL: insert_test2_sub_ss:
    705 ; SSE:       # BB#0:
    706 ; SSE-NEXT:    subss %xmm0, %xmm1
    707 ; SSE-NEXT:    movaps %xmm1, %xmm0
    708 ; SSE-NEXT:    retq
    709 ;
    710 ; AVX-LABEL: insert_test2_sub_ss:
    711 ; AVX:       # BB#0:
    712 ; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
    713 ; AVX-NEXT:    retq
    714   %1 = fsub <4 x float> %b, %a
    715   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
    716   ret <4 x float> %2
    717 }
    718 
    719 define <4 x float> @insert_test2_mul_ss(<4 x float> %a, <4 x float> %b) {
    720 ; SSE-LABEL: insert_test2_mul_ss:
    721 ; SSE:       # BB#0:
    722 ; SSE-NEXT:    mulss %xmm0, %xmm1
    723 ; SSE-NEXT:    movaps %xmm1, %xmm0
    724 ; SSE-NEXT:    retq
    725 ;
    726 ; AVX-LABEL: insert_test2_mul_ss:
    727 ; AVX:       # BB#0:
    728 ; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
    729 ; AVX-NEXT:    retq
    730   %1 = fmul <4 x float> %b, %a
    731   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
    732   ret <4 x float> %2
    733 }
    734 
    735 define <4 x float> @insert_test2_div_ss(<4 x float> %a, <4 x float> %b) {
    736 ; SSE-LABEL: insert_test2_div_ss:
    737 ; SSE:       # BB#0:
    738 ; SSE-NEXT:    divss %xmm0, %xmm1
    739 ; SSE-NEXT:    movaps %xmm1, %xmm0
    740 ; SSE-NEXT:    retq
    741 ;
    742 ; AVX-LABEL: insert_test2_div_ss:
    743 ; AVX:       # BB#0:
    744 ; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
    745 ; AVX-NEXT:    retq
    746   %1 = fdiv <4 x float> %b, %a
    747   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
    748   ret <4 x float> %2
    749 }
    750 
    751 define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) {
    752 ; SSE-LABEL: insert_test2_add_sd:
    753 ; SSE:       # BB#0:
    754 ; SSE-NEXT:    addsd %xmm0, %xmm1
    755 ; SSE-NEXT:    movapd %xmm1, %xmm0
    756 ; SSE-NEXT:    retq
    757 ;
    758 ; AVX-LABEL: insert_test2_add_sd:
    759 ; AVX:       # BB#0:
    760 ; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
    761 ; AVX-NEXT:    retq
    762   %1 = fadd <2 x double> %b, %a
    763   %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
    764   ret <2 x double> %2
    765 }
    766 
    767 define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) {
    768 ; SSE-LABEL: insert_test2_sub_sd:
    769 ; SSE:       # BB#0:
    770 ; SSE-NEXT:    subsd %xmm0, %xmm1
    771 ; SSE-NEXT:    movapd %xmm1, %xmm0
    772 ; SSE-NEXT:    retq
    773 ;
    774 ; AVX-LABEL: insert_test2_sub_sd:
    775 ; AVX:       # BB#0:
    776 ; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
    777 ; AVX-NEXT:    retq
    778   %1 = fsub <2 x double> %b, %a
    779   %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
    780   ret <2 x double> %2
    781 }
    782 
    783 define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) {
    784 ; SSE-LABEL: insert_test2_mul_sd:
    785 ; SSE:       # BB#0:
    786 ; SSE-NEXT:    mulsd %xmm0, %xmm1
    787 ; SSE-NEXT:    movapd %xmm1, %xmm0
    788 ; SSE-NEXT:    retq
    789 ;
    790 ; AVX-LABEL: insert_test2_mul_sd:
    791 ; AVX:       # BB#0:
    792 ; AVX-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
    793 ; AVX-NEXT:    retq
    794   %1 = fmul <2 x double> %b, %a
    795   %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
    796   ret <2 x double> %2
    797 }
    798 
    799 define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) {
    800 ; SSE-LABEL: insert_test2_div_sd:
    801 ; SSE:       # BB#0:
    802 ; SSE-NEXT:    divsd %xmm0, %xmm1
    803 ; SSE-NEXT:    movapd %xmm1, %xmm0
    804 ; SSE-NEXT:    retq
    805 ;
    806 ; AVX-LABEL: insert_test2_div_sd:
    807 ; AVX:       # BB#0:
    808 ; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
    809 ; AVX-NEXT:    retq
    810   %1 = fdiv <2 x double> %b, %a
    811   %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
    812   ret <2 x double> %2
    813 }
    814 
    815 define <4 x float> @insert_test3_add_ss(<4 x float> %a, <4 x float> %b) {
    816 ; SSE-LABEL: insert_test3_add_ss:
    817 ; SSE:       # BB#0:
    818 ; SSE-NEXT:    addss %xmm1, %xmm0
    819 ; SSE-NEXT:    retq
    820 ;
    821 ; AVX-LABEL: insert_test3_add_ss:
    822 ; AVX:       # BB#0:
    823 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
    824 ; AVX-NEXT:    retq
    825   %1 = fadd <4 x float> %a, %b
    826   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
    827   ret <4 x float> %2
    828 }
    829 
    830 define <4 x float> @insert_test3_sub_ss(<4 x float> %a, <4 x float> %b) {
    831 ; SSE-LABEL: insert_test3_sub_ss:
    832 ; SSE:       # BB#0:
    833 ; SSE-NEXT:    subss %xmm1, %xmm0
    834 ; SSE-NEXT:    retq
    835 ;
    836 ; AVX-LABEL: insert_test3_sub_ss:
    837 ; AVX:       # BB#0:
    838 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
    839 ; AVX-NEXT:    retq
    840   %1 = fsub <4 x float> %a, %b
    841   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
    842   ret <4 x float> %2
    843 }
    844 
    845 define <4 x float> @insert_test3_mul_ss(<4 x float> %a, <4 x float> %b) {
    846 ; SSE-LABEL: insert_test3_mul_ss:
    847 ; SSE:       # BB#0:
    848 ; SSE-NEXT:    mulss %xmm1, %xmm0
    849 ; SSE-NEXT:    retq
    850 ;
    851 ; AVX-LABEL: insert_test3_mul_ss:
    852 ; AVX:       # BB#0:
    853 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
    854 ; AVX-NEXT:    retq
    855   %1 = fmul <4 x float> %a, %b
    856   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
    857   ret <4 x float> %2
    858 }
    859 
    860 define <4 x float> @insert_test3_div_ss(<4 x float> %a, <4 x float> %b) {
    861 ; SSE-LABEL: insert_test3_div_ss:
    862 ; SSE:       # BB#0:
    863 ; SSE-NEXT:    divss %xmm1, %xmm0
    864 ; SSE-NEXT:    retq
    865 ;
    866 ; AVX-LABEL: insert_test3_div_ss:
    867 ; AVX:       # BB#0:
    868 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
    869 ; AVX-NEXT:    retq
    870   %1 = fdiv <4 x float> %a, %b
    871   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
    872   ret <4 x float> %2
    873 }
    874 
    875 define <2 x double> @insert_test3_add_sd(<2 x double> %a, <2 x double> %b) {
    876 ; SSE-LABEL: insert_test3_add_sd:
    877 ; SSE:       # BB#0:
    878 ; SSE-NEXT:    addsd %xmm1, %xmm0
    879 ; SSE-NEXT:    retq
    880 ;
    881 ; AVX-LABEL: insert_test3_add_sd:
    882 ; AVX:       # BB#0:
    883 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
    884 ; AVX-NEXT:    retq
    885   %1 = fadd <2 x double> %a, %b
    886   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
    887   ret <2 x double> %2
    888 }
    889 
    890 define <2 x double> @insert_test3_sub_sd(<2 x double> %a, <2 x double> %b) {
    891 ; SSE-LABEL: insert_test3_sub_sd:
    892 ; SSE:       # BB#0:
    893 ; SSE-NEXT:    subsd %xmm1, %xmm0
    894 ; SSE-NEXT:    retq
    895 ;
    896 ; AVX-LABEL: insert_test3_sub_sd:
    897 ; AVX:       # BB#0:
    898 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
    899 ; AVX-NEXT:    retq
    900   %1 = fsub <2 x double> %a, %b
    901   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
    902   ret <2 x double> %2
    903 }
    904 
    905 define <2 x double> @insert_test3_mul_sd(<2 x double> %a, <2 x double> %b) {
    906 ; SSE-LABEL: insert_test3_mul_sd:
    907 ; SSE:       # BB#0:
    908 ; SSE-NEXT:    mulsd %xmm1, %xmm0
    909 ; SSE-NEXT:    retq
    910 ;
    911 ; AVX-LABEL: insert_test3_mul_sd:
    912 ; AVX:       # BB#0:
    913 ; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
    914 ; AVX-NEXT:    retq
    915   %1 = fmul <2 x double> %a, %b
    916   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
    917   ret <2 x double> %2
    918 }
    919 
    920 define <2 x double> @insert_test3_div_sd(<2 x double> %a, <2 x double> %b) {
    921 ; SSE-LABEL: insert_test3_div_sd:
    922 ; SSE:       # BB#0:
    923 ; SSE-NEXT:    divsd %xmm1, %xmm0
    924 ; SSE-NEXT:    retq
    925 ;
    926 ; AVX-LABEL: insert_test3_div_sd:
    927 ; AVX:       # BB#0:
    928 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
    929 ; AVX-NEXT:    retq
    930   %1 = fdiv <2 x double> %a, %b
    931   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
    932   ret <2 x double> %2
    933 }
    934 
    935 define <4 x float> @insert_test4_add_ss(<4 x float> %a, <4 x float> %b) {
    936 ; SSE-LABEL: insert_test4_add_ss:
    937 ; SSE:       # BB#0:
    938 ; SSE-NEXT:    addss %xmm0, %xmm1
    939 ; SSE-NEXT:    movaps %xmm1, %xmm0
    940 ; SSE-NEXT:    retq
    941 ;
    942 ; AVX-LABEL: insert_test4_add_ss:
    943 ; AVX:       # BB#0:
    944 ; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
    945 ; AVX-NEXT:    retq
    946   %1 = fadd <4 x float> %b, %a
    947   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
    948   ret <4 x float> %2
    949 }
    950 
    951 define <4 x float> @insert_test4_sub_ss(<4 x float> %a, <4 x float> %b) {
    952 ; SSE-LABEL: insert_test4_sub_ss:
    953 ; SSE:       # BB#0:
    954 ; SSE-NEXT:    subss %xmm0, %xmm1
    955 ; SSE-NEXT:    movaps %xmm1, %xmm0
    956 ; SSE-NEXT:    retq
    957 ;
    958 ; AVX-LABEL: insert_test4_sub_ss:
    959 ; AVX:       # BB#0:
    960 ; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
    961 ; AVX-NEXT:    retq
    962   %1 = fsub <4 x float> %b, %a
    963   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
    964   ret <4 x float> %2
    965 }
    966 
    967 define <4 x float> @insert_test4_mul_ss(<4 x float> %a, <4 x float> %b) {
    968 ; SSE-LABEL: insert_test4_mul_ss:
    969 ; SSE:       # BB#0:
    970 ; SSE-NEXT:    mulss %xmm0, %xmm1
    971 ; SSE-NEXT:    movaps %xmm1, %xmm0
    972 ; SSE-NEXT:    retq
    973 ;
    974 ; AVX-LABEL: insert_test4_mul_ss:
    975 ; AVX:       # BB#0:
    976 ; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
    977 ; AVX-NEXT:    retq
    978   %1 = fmul <4 x float> %b, %a
    979   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
    980   ret <4 x float> %2
    981 }
    982 
    983 define <4 x float> @insert_test4_div_ss(<4 x float> %a, <4 x float> %b) {
    984 ; SSE-LABEL: insert_test4_div_ss:
    985 ; SSE:       # BB#0:
    986 ; SSE-NEXT:    divss %xmm0, %xmm1
    987 ; SSE-NEXT:    movaps %xmm1, %xmm0
    988 ; SSE-NEXT:    retq
    989 ;
    990 ; AVX-LABEL: insert_test4_div_ss:
    991 ; AVX:       # BB#0:
    992 ; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
    993 ; AVX-NEXT:    retq
    994   %1 = fdiv <4 x float> %b, %a
    995   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
    996   ret <4 x float> %2
    997 }
    998 
    999 define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) {
   1000 ; SSE-LABEL: insert_test4_add_sd:
   1001 ; SSE:       # BB#0:
   1002 ; SSE-NEXT:    addsd %xmm0, %xmm1
   1003 ; SSE-NEXT:    movapd %xmm1, %xmm0
   1004 ; SSE-NEXT:    retq
   1005 ;
   1006 ; AVX-LABEL: insert_test4_add_sd:
   1007 ; AVX:       # BB#0:
   1008 ; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
   1009 ; AVX-NEXT:    retq
   1010   %1 = fadd <2 x double> %b, %a
   1011   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
   1012   ret <2 x double> %2
   1013 }
   1014 
   1015 define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) {
   1016 ; SSE-LABEL: insert_test4_sub_sd:
   1017 ; SSE:       # BB#0:
   1018 ; SSE-NEXT:    subsd %xmm0, %xmm1
   1019 ; SSE-NEXT:    movapd %xmm1, %xmm0
   1020 ; SSE-NEXT:    retq
   1021 ;
   1022 ; AVX-LABEL: insert_test4_sub_sd:
   1023 ; AVX:       # BB#0:
   1024 ; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
   1025 ; AVX-NEXT:    retq
   1026   %1 = fsub <2 x double> %b, %a
   1027   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
   1028   ret <2 x double> %2
   1029 }
   1030 
   1031 define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) {
   1032 ; SSE-LABEL: insert_test4_mul_sd:
   1033 ; SSE:       # BB#0:
   1034 ; SSE-NEXT:    mulsd %xmm0, %xmm1
   1035 ; SSE-NEXT:    movapd %xmm1, %xmm0
   1036 ; SSE-NEXT:    retq
   1037 ;
   1038 ; AVX-LABEL: insert_test4_mul_sd:
   1039 ; AVX:       # BB#0:
   1040 ; AVX-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
   1041 ; AVX-NEXT:    retq
   1042   %1 = fmul <2 x double> %b, %a
   1043   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
   1044   ret <2 x double> %2
   1045 }
   1046 
   1047 define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) {
   1048 ; SSE-LABEL: insert_test4_div_sd:
   1049 ; SSE:       # BB#0:
   1050 ; SSE-NEXT:    divsd %xmm0, %xmm1
   1051 ; SSE-NEXT:    movapd %xmm1, %xmm0
   1052 ; SSE-NEXT:    retq
   1053 ;
   1054 ; AVX-LABEL: insert_test4_div_sd:
   1055 ; AVX:       # BB#0:
   1056 ; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
   1057 ; AVX-NEXT:    retq
   1058   %1 = fdiv <2 x double> %b, %a
   1059   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
   1060   ret <2 x double> %2
   1061 }
   1062