Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
      6 
      7 ; Verify that we don't emit packed vector shifts instructions if the
      8 ; condition used by the vector select is a vector of constants.
      9 
     10 define <4 x float> @test1(<4 x float> %a, <4 x float> %b) {
     11 ; SSE2-LABEL: test1:
     12 ; SSE2:       # %bb.0:
     13 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
     14 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
     15 ; SSE2-NEXT:    retq
     16 ;
     17 ; SSE41-LABEL: test1:
     18 ; SSE41:       # %bb.0:
     19 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
     20 ; SSE41-NEXT:    retq
     21 ;
     22 ; AVX-LABEL: test1:
     23 ; AVX:       # %bb.0:
     24 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
     25 ; AVX-NEXT:    retq
     26   %1 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %a, <4 x float> %b
     27   ret <4 x float> %1
     28 }
     29 
     30 define <4 x float> @test2(<4 x float> %a, <4 x float> %b) {
     31 ; SSE2-LABEL: test2:
     32 ; SSE2:       # %bb.0:
     33 ; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
     34 ; SSE2-NEXT:    movapd %xmm1, %xmm0
     35 ; SSE2-NEXT:    retq
     36 ;
     37 ; SSE41-LABEL: test2:
     38 ; SSE41:       # %bb.0:
     39 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
     40 ; SSE41-NEXT:    retq
     41 ;
     42 ; AVX-LABEL: test2:
     43 ; AVX:       # %bb.0:
     44 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
     45 ; AVX-NEXT:    retq
     46   %1 = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x float> %a, <4 x float> %b
     47   ret <4 x float> %1
     48 }
     49 
     50 define <4 x float> @test3(<4 x float> %a, <4 x float> %b) {
     51 ; SSE2-LABEL: test3:
     52 ; SSE2:       # %bb.0:
     53 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
     54 ; SSE2-NEXT:    retq
     55 ;
     56 ; SSE41-LABEL: test3:
     57 ; SSE41:       # %bb.0:
     58 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
     59 ; SSE41-NEXT:    retq
     60 ;
     61 ; AVX-LABEL: test3:
     62 ; AVX:       # %bb.0:
     63 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
     64 ; AVX-NEXT:    retq
     65   %1 = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x float> %a, <4 x float> %b
     66   ret <4 x float> %1
     67 }
     68 
     69 define <4 x float> @test4(<4 x float> %a, <4 x float> %b) {
     70 ; SSE-LABEL: test4:
     71 ; SSE:       # %bb.0:
     72 ; SSE-NEXT:    movaps %xmm1, %xmm0
     73 ; SSE-NEXT:    retq
     74 ;
     75 ; AVX-LABEL: test4:
     76 ; AVX:       # %bb.0:
     77 ; AVX-NEXT:    vmovaps %xmm1, %xmm0
     78 ; AVX-NEXT:    retq
     79   %1 = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>, <4 x float> %a, <4 x float> %b
     80   ret <4 x float> %1
     81 }
     82 
     83 define <4 x float> @test5(<4 x float> %a, <4 x float> %b) {
     84 ; SSE-LABEL: test5:
     85 ; SSE:       # %bb.0:
     86 ; SSE-NEXT:    retq
     87 ;
     88 ; AVX-LABEL: test5:
     89 ; AVX:       # %bb.0:
     90 ; AVX-NEXT:    retq
     91   %1 = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %b
     92   ret <4 x float> %1
     93 }
     94 
     95 define <8 x i16> @test6(<8 x i16> %a, <8 x i16> %b) {
     96 ; SSE-LABEL: test6:
     97 ; SSE:       # %bb.0:
     98 ; SSE-NEXT:    retq
     99 ;
    100 ; AVX-LABEL: test6:
    101 ; AVX:       # %bb.0:
    102 ; AVX-NEXT:    retq
    103   %1 = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x i16> %a, <8 x i16> %a
    104   ret <8 x i16> %1
    105 }
    106 
    107 define <8 x i16> @test7(<8 x i16> %a, <8 x i16> %b) {
    108 ; SSE2-LABEL: test7:
    109 ; SSE2:       # %bb.0:
    110 ; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
    111 ; SSE2-NEXT:    movapd %xmm1, %xmm0
    112 ; SSE2-NEXT:    retq
    113 ;
    114 ; SSE41-LABEL: test7:
    115 ; SSE41:       # %bb.0:
    116 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
    117 ; SSE41-NEXT:    retq
    118 ;
    119 ; AVX-LABEL: test7:
    120 ; AVX:       # %bb.0:
    121 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
    122 ; AVX-NEXT:    retq
    123   %1 = select <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %a, <8 x i16> %b
    124   ret <8 x i16> %1
    125 }
    126 
    127 define <8 x i16> @test8(<8 x i16> %a, <8 x i16> %b) {
    128 ; SSE2-LABEL: test8:
    129 ; SSE2:       # %bb.0:
    130 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    131 ; SSE2-NEXT:    retq
    132 ;
    133 ; SSE41-LABEL: test8:
    134 ; SSE41:       # %bb.0:
    135 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    136 ; SSE41-NEXT:    retq
    137 ;
    138 ; AVX-LABEL: test8:
    139 ; AVX:       # %bb.0:
    140 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    141 ; AVX-NEXT:    retq
    142   %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, <8 x i16> %a, <8 x i16> %b
    143   ret <8 x i16> %1
    144 }
    145 
    146 define <8 x i16> @test9(<8 x i16> %a, <8 x i16> %b) {
    147 ; SSE-LABEL: test9:
    148 ; SSE:       # %bb.0:
    149 ; SSE-NEXT:    movaps %xmm1, %xmm0
    150 ; SSE-NEXT:    retq
    151 ;
    152 ; AVX-LABEL: test9:
    153 ; AVX:       # %bb.0:
    154 ; AVX-NEXT:    vmovaps %xmm1, %xmm0
    155 ; AVX-NEXT:    retq
    156   %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %a, <8 x i16> %b
    157   ret <8 x i16> %1
    158 }
    159 
    160 define <8 x i16> @test10(<8 x i16> %a, <8 x i16> %b) {
    161 ; SSE-LABEL: test10:
    162 ; SSE:       # %bb.0:
    163 ; SSE-NEXT:    retq
    164 ;
    165 ; AVX-LABEL: test10:
    166 ; AVX:       # %bb.0:
    167 ; AVX-NEXT:    retq
    168   %1 = select <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> %a, <8 x i16> %b
    169   ret <8 x i16> %1
    170 }
    171 
    172 define <8 x i16> @test11(<8 x i16> %a, <8 x i16> %b) {
    173 ; SSE2-LABEL: test11:
    174 ; SSE2:       # %bb.0:
    175 ; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [0,65535,65535,0,0,65535,65535,0]
    176 ; SSE2-NEXT:    andps %xmm2, %xmm0
    177 ; SSE2-NEXT:    andnps %xmm1, %xmm2
    178 ; SSE2-NEXT:    orps %xmm2, %xmm0
    179 ; SSE2-NEXT:    retq
    180 ;
    181 ; SSE41-LABEL: test11:
    182 ; SSE41:       # %bb.0:
    183 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3,4],xmm0[5,6],xmm1[7]
    184 ; SSE41-NEXT:    retq
    185 ;
    186 ; AVX-LABEL: test11:
    187 ; AVX:       # %bb.0:
    188 ; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3,4],xmm0[5,6],xmm1[7]
    189 ; AVX-NEXT:    retq
    190   %1 = select <8 x i1> <i1 false, i1 true, i1 true, i1 false, i1 undef, i1 true, i1 true, i1 undef>, <8 x i16> %a, <8 x i16> %b
    191   ret <8 x i16> %1
    192 }
    193 
    194 define <8 x i16> @test12(<8 x i16> %a, <8 x i16> %b) {
    195 ; SSE-LABEL: test12:
    196 ; SSE:       # %bb.0:
    197 ; SSE-NEXT:    movaps %xmm1, %xmm0
    198 ; SSE-NEXT:    retq
    199 ;
    200 ; AVX-LABEL: test12:
    201 ; AVX:       # %bb.0:
    202 ; AVX-NEXT:    vmovaps %xmm1, %xmm0
    203 ; AVX-NEXT:    retq
    204   %1 = select <8 x i1> <i1 false, i1 false, i1 undef, i1 false, i1 false, i1 false, i1 false, i1 undef>, <8 x i16> %a, <8 x i16> %b
    205   ret <8 x i16> %1
    206 }
    207 
    208 define <8 x i16> @test13(<8 x i16> %a, <8 x i16> %b) {
    209 ; SSE-LABEL: test13:
    210 ; SSE:       # %bb.0:
    211 ; SSE-NEXT:    movaps %xmm1, %xmm0
    212 ; SSE-NEXT:    retq
    213 ;
    214 ; AVX-LABEL: test13:
    215 ; AVX:       # %bb.0:
    216 ; AVX-NEXT:    vmovaps %xmm1, %xmm0
    217 ; AVX-NEXT:    retq
    218   %1 = select <8 x i1> <i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>, <8 x i16> %a, <8 x i16> %b
    219   ret <8 x i16> %1
    220 }
    221 
    222 ; Fold (vselect (build_vector AllOnes), N1, N2) -> N1
    223 define <4 x float> @test14(<4 x float> %a, <4 x float> %b) {
    224 ; SSE-LABEL: test14:
    225 ; SSE:       # %bb.0:
    226 ; SSE-NEXT:    retq
    227 ;
    228 ; AVX-LABEL: test14:
    229 ; AVX:       # %bb.0:
    230 ; AVX-NEXT:    retq
    231   %1 = select <4 x i1> <i1 true, i1 undef, i1 true, i1 undef>, <4 x float> %a, <4 x float> %b
    232   ret <4 x float> %1
    233 }
    234 
    235 define <8 x i16> @test15(<8 x i16> %a, <8 x i16> %b) {
    236 ; SSE-LABEL: test15:
    237 ; SSE:       # %bb.0:
    238 ; SSE-NEXT:    retq
    239 ;
    240 ; AVX-LABEL: test15:
    241 ; AVX:       # %bb.0:
    242 ; AVX-NEXT:    retq
    243   %1 = select <8 x i1> <i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 true, i1 true, i1 undef>, <8 x i16> %a, <8 x i16> %b
    244   ret <8 x i16> %1
    245 }
    246 
    247 ; Fold (vselect (build_vector AllZeros), N1, N2) -> N2
    248 define <4 x float> @test16(<4 x float> %a, <4 x float> %b) {
    249 ; SSE-LABEL: test16:
    250 ; SSE:       # %bb.0:
    251 ; SSE-NEXT:    movaps %xmm1, %xmm0
    252 ; SSE-NEXT:    retq
    253 ;
    254 ; AVX-LABEL: test16:
    255 ; AVX:       # %bb.0:
    256 ; AVX-NEXT:    vmovaps %xmm1, %xmm0
    257 ; AVX-NEXT:    retq
    258   %1 = select <4 x i1> <i1 false, i1 undef, i1 false, i1 undef>, <4 x float> %a, <4 x float> %b
    259   ret <4 x float> %1
    260 }
    261 
    262 define <8 x i16> @test17(<8 x i16> %a, <8 x i16> %b) {
    263 ; SSE-LABEL: test17:
    264 ; SSE:       # %bb.0:
    265 ; SSE-NEXT:    movaps %xmm1, %xmm0
    266 ; SSE-NEXT:    retq
    267 ;
    268 ; AVX-LABEL: test17:
    269 ; AVX:       # %bb.0:
    270 ; AVX-NEXT:    vmovaps %xmm1, %xmm0
    271 ; AVX-NEXT:    retq
    272   %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 undef, i1 undef, i1 false, i1 false, i1 undef>, <8 x i16> %a, <8 x i16> %b
    273   ret <8 x i16> %1
    274 }
    275 
    276 define <4 x float> @test18(<4 x float> %a, <4 x float> %b) {
    277 ; SSE2-LABEL: test18:
    278 ; SSE2:       # %bb.0:
    279 ; SSE2-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
    280 ; SSE2-NEXT:    retq
    281 ;
    282 ; SSE41-LABEL: test18:
    283 ; SSE41:       # %bb.0:
    284 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
    285 ; SSE41-NEXT:    retq
    286 ;
    287 ; AVX-LABEL: test18:
    288 ; AVX:       # %bb.0:
    289 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
    290 ; AVX-NEXT:    retq
    291   %1 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %b
    292   ret <4 x float> %1
    293 }
    294 
    295 define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) {
    296 ; SSE2-LABEL: test19:
    297 ; SSE2:       # %bb.0:
    298 ; SSE2-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
    299 ; SSE2-NEXT:    retq
    300 ;
    301 ; SSE41-LABEL: test19:
    302 ; SSE41:       # %bb.0:
    303 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
    304 ; SSE41-NEXT:    retq
    305 ;
    306 ; AVX-LABEL: test19:
    307 ; AVX:       # %bb.0:
    308 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
    309 ; AVX-NEXT:    retq
    310   %1 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x i32> %a, <4 x i32> %b
    311   ret <4 x i32> %1
    312 }
    313 
    314 define <2 x double> @test20(<2 x double> %a, <2 x double> %b) {
    315 ; SSE2-LABEL: test20:
    316 ; SSE2:       # %bb.0:
    317 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    318 ; SSE2-NEXT:    retq
    319 ;
    320 ; SSE41-LABEL: test20:
    321 ; SSE41:       # %bb.0:
    322 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    323 ; SSE41-NEXT:    retq
    324 ;
    325 ; AVX-LABEL: test20:
    326 ; AVX:       # %bb.0:
    327 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    328 ; AVX-NEXT:    retq
    329   %1 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %b
    330   ret <2 x double> %1
    331 }
    332 
    333 define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) {
    334 ; SSE2-LABEL: test21:
    335 ; SSE2:       # %bb.0:
    336 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    337 ; SSE2-NEXT:    retq
    338 ;
    339 ; SSE41-LABEL: test21:
    340 ; SSE41:       # %bb.0:
    341 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    342 ; SSE41-NEXT:    retq
    343 ;
    344 ; AVX-LABEL: test21:
    345 ; AVX:       # %bb.0:
    346 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    347 ; AVX-NEXT:    retq
    348   %1 = select <2 x i1> <i1 false, i1 true>, <2 x i64> %a, <2 x i64> %b
    349   ret <2 x i64> %1
    350 }
    351 
    352 define <4 x float> @test22(<4 x float> %a, <4 x float> %b) {
    353 ; SSE2-LABEL: test22:
    354 ; SSE2:       # %bb.0:
    355 ; SSE2-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
    356 ; SSE2-NEXT:    movaps %xmm1, %xmm0
    357 ; SSE2-NEXT:    retq
    358 ;
    359 ; SSE41-LABEL: test22:
    360 ; SSE41:       # %bb.0:
    361 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
    362 ; SSE41-NEXT:    retq
    363 ;
    364 ; AVX-LABEL: test22:
    365 ; AVX:       # %bb.0:
    366 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
    367 ; AVX-NEXT:    retq
    368   %1 = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %a, <4 x float> %b
    369   ret <4 x float> %1
    370 }
    371 
    372 define <4 x i32> @test23(<4 x i32> %a, <4 x i32> %b) {
    373 ; SSE2-LABEL: test23:
    374 ; SSE2:       # %bb.0:
    375 ; SSE2-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
    376 ; SSE2-NEXT:    movaps %xmm1, %xmm0
    377 ; SSE2-NEXT:    retq
    378 ;
    379 ; SSE41-LABEL: test23:
    380 ; SSE41:       # %bb.0:
    381 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
    382 ; SSE41-NEXT:    retq
    383 ;
    384 ; AVX-LABEL: test23:
    385 ; AVX:       # %bb.0:
    386 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
    387 ; AVX-NEXT:    retq
    388   %1 = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> %a, <4 x i32> %b
    389   ret <4 x i32> %1
    390 }
    391 
    392 define <2 x double> @test24(<2 x double> %a, <2 x double> %b) {
    393 ; SSE2-LABEL: test24:
    394 ; SSE2:       # %bb.0:
    395 ; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
    396 ; SSE2-NEXT:    movapd %xmm1, %xmm0
    397 ; SSE2-NEXT:    retq
    398 ;
    399 ; SSE41-LABEL: test24:
    400 ; SSE41:       # %bb.0:
    401 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
    402 ; SSE41-NEXT:    retq
    403 ;
    404 ; AVX-LABEL: test24:
    405 ; AVX:       # %bb.0:
    406 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
    407 ; AVX-NEXT:    retq
    408   %1 = select <2 x i1> <i1 true, i1 false>, <2 x double> %a, <2 x double> %b
    409   ret <2 x double> %1
    410 }
    411 
    412 define <2 x i64> @test25(<2 x i64> %a, <2 x i64> %b) {
    413 ; SSE2-LABEL: test25:
    414 ; SSE2:       # %bb.0:
    415 ; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
    416 ; SSE2-NEXT:    movapd %xmm1, %xmm0
    417 ; SSE2-NEXT:    retq
    418 ;
    419 ; SSE41-LABEL: test25:
    420 ; SSE41:       # %bb.0:
    421 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
    422 ; SSE41-NEXT:    retq
    423 ;
    424 ; AVX-LABEL: test25:
    425 ; AVX:       # %bb.0:
    426 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
    427 ; AVX-NEXT:    retq
    428   %1 = select <2 x i1> <i1 true, i1 false>, <2 x i64> %a, <2 x i64> %b
    429   ret <2 x i64> %1
    430 }
    431 
    432 define <4 x float> @select_of_shuffles_0(<2 x float> %a0, <2 x float> %b0, <2 x float> %a1, <2 x float> %b1) {
    433 ; SSE-LABEL: select_of_shuffles_0:
    434 ; SSE:       # %bb.0:
    435 ; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
    436 ; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0]
    437 ; SSE-NEXT:    subps %xmm1, %xmm0
    438 ; SSE-NEXT:    retq
    439 ;
    440 ; AVX-LABEL: select_of_shuffles_0:
    441 ; AVX:       # %bb.0:
    442 ; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
    443 ; AVX-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0]
    444 ; AVX-NEXT:    vsubps %xmm1, %xmm0, %xmm0
    445 ; AVX-NEXT:    retq
    446   %1 = shufflevector <2 x float> %a0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
    447   %2 = shufflevector <2 x float> %a1, <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
    448   %3 = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x float> %2, <4 x float> %1
    449   %4 = shufflevector <2 x float> %b0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
    450   %5 = shufflevector <2 x float> %b1, <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
    451   %6 = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x float> %5, <4 x float> %4
    452   %7 = fsub <4 x float> %3, %6
    453   ret <4 x float> %7
    454 }
    455 
    456 ; PR20677
    457 define <16 x double> @select_illegal(<16 x double> %a, <16 x double> %b) {
    458 ; SSE-LABEL: select_illegal:
    459 ; SSE:       # %bb.0:
    460 ; SSE-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm4
    461 ; SSE-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm5
    462 ; SSE-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm6
    463 ; SSE-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm7
    464 ; SSE-NEXT:    movaps %xmm7, 112(%rdi)
    465 ; SSE-NEXT:    movaps %xmm6, 96(%rdi)
    466 ; SSE-NEXT:    movaps %xmm5, 80(%rdi)
    467 ; SSE-NEXT:    movaps %xmm4, 64(%rdi)
    468 ; SSE-NEXT:    movaps %xmm3, 48(%rdi)
    469 ; SSE-NEXT:    movaps %xmm2, 32(%rdi)
    470 ; SSE-NEXT:    movaps %xmm1, 16(%rdi)
    471 ; SSE-NEXT:    movaps %xmm0, (%rdi)
    472 ; SSE-NEXT:    movq %rdi, %rax
    473 ; SSE-NEXT:    retq
    474 ;
    475 ; AVX-LABEL: select_illegal:
    476 ; AVX:       # %bb.0:
    477 ; AVX-NEXT:    vmovaps %ymm6, %ymm2
    478 ; AVX-NEXT:    vmovaps %ymm7, %ymm3
    479 ; AVX-NEXT:    retq
    480   %sel = select <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x double> %a, <16 x double> %b
    481   ret <16 x double> %sel
    482 }
    483 
    484 ; Make sure we can optimize the condition MSB when it is used by 2 selects.
    485 ; The v2i1 here will be passed as v2i64 and we will emit a sign_extend_inreg to fill the upper bits.
    486 ; We should be able to remove the sra from the sign_extend_inreg to leave only shl.
    487 define <2 x i64> @shrunkblend_2uses(<2 x i1> %cond, <2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) {
    488 ; SSE2-LABEL: shrunkblend_2uses:
    489 ; SSE2:       # %bb.0:
    490 ; SSE2-NEXT:    psllq $63, %xmm0
    491 ; SSE2-NEXT:    psrad $31, %xmm0
    492 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
    493 ; SSE2-NEXT:    movdqa %xmm0, %xmm5
    494 ; SSE2-NEXT:    pandn %xmm2, %xmm5
    495 ; SSE2-NEXT:    pand %xmm0, %xmm1
    496 ; SSE2-NEXT:    por %xmm1, %xmm5
    497 ; SSE2-NEXT:    pand %xmm0, %xmm3
    498 ; SSE2-NEXT:    pandn %xmm4, %xmm0
    499 ; SSE2-NEXT:    por %xmm3, %xmm0
    500 ; SSE2-NEXT:    paddq %xmm5, %xmm0
    501 ; SSE2-NEXT:    retq
    502 ;
    503 ; SSE41-LABEL: shrunkblend_2uses:
    504 ; SSE41:       # %bb.0:
    505 ; SSE41-NEXT:    psllq $63, %xmm0
    506 ; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
    507 ; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm4
    508 ; SSE41-NEXT:    paddq %xmm2, %xmm4
    509 ; SSE41-NEXT:    movdqa %xmm4, %xmm0
    510 ; SSE41-NEXT:    retq
    511 ;
    512 ; AVX-LABEL: shrunkblend_2uses:
    513 ; AVX:       # %bb.0:
    514 ; AVX-NEXT:    vpsllq $63, %xmm0, %xmm0
    515 ; AVX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm2, %xmm1
    516 ; AVX-NEXT:    vblendvpd %xmm0, %xmm3, %xmm4, %xmm0
    517 ; AVX-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
    518 ; AVX-NEXT:    retq
    519   %x = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b
    520   %y = select <2 x i1> %cond, <2 x i64> %c, <2 x i64> %d
    521   %z = add <2 x i64> %x, %y
    522   ret <2 x i64> %z
    523 }
    524 
    525 ; Similar to above, but condition has a use that isn't a condition of a vselect so we can't optimize.
    526 define <2 x i64> @shrunkblend_nonvselectuse(<2 x i1> %cond, <2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) {
    527 ; SSE2-LABEL: shrunkblend_nonvselectuse:
    528 ; SSE2:       # %bb.0:
    529 ; SSE2-NEXT:    psllq $63, %xmm0
    530 ; SSE2-NEXT:    psrad $31, %xmm0
    531 ; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
    532 ; SSE2-NEXT:    movdqa %xmm3, %xmm0
    533 ; SSE2-NEXT:    pandn %xmm2, %xmm0
    534 ; SSE2-NEXT:    pand %xmm3, %xmm1
    535 ; SSE2-NEXT:    por %xmm1, %xmm0
    536 ; SSE2-NEXT:    paddq %xmm3, %xmm0
    537 ; SSE2-NEXT:    retq
    538 ;
    539 ; SSE41-LABEL: shrunkblend_nonvselectuse:
    540 ; SSE41:       # %bb.0:
    541 ; SSE41-NEXT:    psllq $63, %xmm0
    542 ; SSE41-NEXT:    psrad $31, %xmm0
    543 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
    544 ; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
    545 ; SSE41-NEXT:    paddq %xmm2, %xmm0
    546 ; SSE41-NEXT:    retq
    547 ;
    548 ; AVX-LABEL: shrunkblend_nonvselectuse:
    549 ; AVX:       # %bb.0:
    550 ; AVX-NEXT:    vpsllq $63, %xmm0, %xmm0
    551 ; AVX-NEXT:    vpxor %xmm3, %xmm3, %xmm3
    552 ; AVX-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0
    553 ; AVX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm2, %xmm1
    554 ; AVX-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
    555 ; AVX-NEXT:    retq
    556   %x = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b
    557   %y = sext <2 x i1> %cond to <2 x i64>
    558   %z = add <2 x i64> %x, %y
    559   ret <2 x i64> %z
    560 }
    561 
    562