Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX1
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX,AVX1OR2,AVX2OR512VL,AVX2,AVX2-SLOW
      8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=ALL,AVX,AVX1OR2,AVX2OR512VL,AVX2,AVX2-FAST
      9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+fast-variable-shuffle | FileCheck %s --check-prefixes=ALL,AVX,AVX2OR512VL,AVX512VL
     10 
     11 define <4 x i32> @shuffle_v4i32_0001(<4 x i32> %a, <4 x i32> %b) {
     12 ; SSE-LABEL: shuffle_v4i32_0001:
     13 ; SSE:       # %bb.0:
     14 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
     15 ; SSE-NEXT:    retq
     16 ;
     17 ; AVX-LABEL: shuffle_v4i32_0001:
     18 ; AVX:       # %bb.0:
     19 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,1]
     20 ; AVX-NEXT:    retq
     21   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
     22   ret <4 x i32> %shuffle
     23 }
     24 define <4 x i32> @shuffle_v4i32_0020(<4 x i32> %a, <4 x i32> %b) {
     25 ; SSE-LABEL: shuffle_v4i32_0020:
     26 ; SSE:       # %bb.0:
     27 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,2,0]
     28 ; SSE-NEXT:    retq
     29 ;
     30 ; AVX-LABEL: shuffle_v4i32_0020:
     31 ; AVX:       # %bb.0:
     32 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,0]
     33 ; AVX-NEXT:    retq
     34   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
     35   ret <4 x i32> %shuffle
     36 }
     37 define <4 x i32> @shuffle_v4i32_0112(<4 x i32> %a, <4 x i32> %b) {
     38 ; SSE-LABEL: shuffle_v4i32_0112:
     39 ; SSE:       # %bb.0:
     40 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
     41 ; SSE-NEXT:    retq
     42 ;
     43 ; AVX-LABEL: shuffle_v4i32_0112:
     44 ; AVX:       # %bb.0:
     45 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,2]
     46 ; AVX-NEXT:    retq
     47   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2>
     48   ret <4 x i32> %shuffle
     49 }
     50 define <4 x i32> @shuffle_v4i32_0300(<4 x i32> %a, <4 x i32> %b) {
     51 ; SSE-LABEL: shuffle_v4i32_0300:
     52 ; SSE:       # %bb.0:
     53 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,0,0]
     54 ; SSE-NEXT:    retq
     55 ;
     56 ; AVX-LABEL: shuffle_v4i32_0300:
     57 ; AVX:       # %bb.0:
     58 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,3,0,0]
     59 ; AVX-NEXT:    retq
     60   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
     61   ret <4 x i32> %shuffle
     62 }
     63 define <4 x i32> @shuffle_v4i32_1000(<4 x i32> %a, <4 x i32> %b) {
     64 ; SSE-LABEL: shuffle_v4i32_1000:
     65 ; SSE:       # %bb.0:
     66 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,0,0]
     67 ; SSE-NEXT:    retq
     68 ;
     69 ; AVX-LABEL: shuffle_v4i32_1000:
     70 ; AVX:       # %bb.0:
     71 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,0,0,0]
     72 ; AVX-NEXT:    retq
     73   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
     74   ret <4 x i32> %shuffle
     75 }
     76 define <4 x i32> @shuffle_v4i32_2200(<4 x i32> %a, <4 x i32> %b) {
     77 ; SSE-LABEL: shuffle_v4i32_2200:
     78 ; SSE:       # %bb.0:
     79 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,0,0]
     80 ; SSE-NEXT:    retq
     81 ;
     82 ; AVX-LABEL: shuffle_v4i32_2200:
     83 ; AVX:       # %bb.0:
     84 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,0,0]
     85 ; AVX-NEXT:    retq
     86   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
     87   ret <4 x i32> %shuffle
     88 }
     89 define <4 x i32> @shuffle_v4i32_3330(<4 x i32> %a, <4 x i32> %b) {
     90 ; SSE-LABEL: shuffle_v4i32_3330:
     91 ; SSE:       # %bb.0:
     92 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,3,3,0]
     93 ; SSE-NEXT:    retq
     94 ;
     95 ; AVX-LABEL: shuffle_v4i32_3330:
     96 ; AVX:       # %bb.0:
     97 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,0]
     98 ; AVX-NEXT:    retq
     99   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
    100   ret <4 x i32> %shuffle
    101 }
    102 define <4 x i32> @shuffle_v4i32_3210(<4 x i32> %a, <4 x i32> %b) {
    103 ; SSE-LABEL: shuffle_v4i32_3210:
    104 ; SSE:       # %bb.0:
    105 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
    106 ; SSE-NEXT:    retq
    107 ;
    108 ; AVX-LABEL: shuffle_v4i32_3210:
    109 ; AVX:       # %bb.0:
    110 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
    111 ; AVX-NEXT:    retq
    112   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    113   ret <4 x i32> %shuffle
    114 }
    115 
    116 define <4 x i32> @shuffle_v4i32_2121(<4 x i32> %a, <4 x i32> %b) {
    117 ; SSE-LABEL: shuffle_v4i32_2121:
    118 ; SSE:       # %bb.0:
    119 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,2,1]
    120 ; SSE-NEXT:    retq
    121 ;
    122 ; AVX-LABEL: shuffle_v4i32_2121:
    123 ; AVX:       # %bb.0:
    124 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,1,2,1]
    125 ; AVX-NEXT:    retq
    126   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 1, i32 2, i32 1>
    127   ret <4 x i32> %shuffle
    128 }
    129 
    130 define <4 x float> @shuffle_v4f32_0001(<4 x float> %a, <4 x float> %b) {
    131 ; SSE-LABEL: shuffle_v4f32_0001:
    132 ; SSE:       # %bb.0:
    133 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,1]
    134 ; SSE-NEXT:    retq
    135 ;
    136 ; AVX-LABEL: shuffle_v4f32_0001:
    137 ; AVX:       # %bb.0:
    138 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,1]
    139 ; AVX-NEXT:    retq
    140   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
    141   ret <4 x float> %shuffle
    142 }
    143 define <4 x float> @shuffle_v4f32_0020(<4 x float> %a, <4 x float> %b) {
    144 ; SSE-LABEL: shuffle_v4f32_0020:
    145 ; SSE:       # %bb.0:
    146 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,2,0]
    147 ; SSE-NEXT:    retq
    148 ;
    149 ; AVX-LABEL: shuffle_v4f32_0020:
    150 ; AVX:       # %bb.0:
    151 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,0]
    152 ; AVX-NEXT:    retq
    153   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
    154   ret <4 x float> %shuffle
    155 }
    156 define <4 x float> @shuffle_v4f32_0300(<4 x float> %a, <4 x float> %b) {
    157 ; SSE-LABEL: shuffle_v4f32_0300:
    158 ; SSE:       # %bb.0:
    159 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,3,0,0]
    160 ; SSE-NEXT:    retq
    161 ;
    162 ; AVX-LABEL: shuffle_v4f32_0300:
    163 ; AVX:       # %bb.0:
    164 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,3,0,0]
    165 ; AVX-NEXT:    retq
    166   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
    167   ret <4 x float> %shuffle
    168 }
    169 define <4 x float> @shuffle_v4f32_1000(<4 x float> %a, <4 x float> %b) {
    170 ; SSE-LABEL: shuffle_v4f32_1000:
    171 ; SSE:       # %bb.0:
    172 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,0,0,0]
    173 ; SSE-NEXT:    retq
    174 ;
    175 ; AVX-LABEL: shuffle_v4f32_1000:
    176 ; AVX:       # %bb.0:
    177 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,0,0,0]
    178 ; AVX-NEXT:    retq
    179   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
    180   ret <4 x float> %shuffle
    181 }
    182 define <4 x float> @shuffle_v4f32_2200(<4 x float> %a, <4 x float> %b) {
    183 ; SSE-LABEL: shuffle_v4f32_2200:
    184 ; SSE:       # %bb.0:
    185 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,2,0,0]
    186 ; SSE-NEXT:    retq
    187 ;
    188 ; AVX-LABEL: shuffle_v4f32_2200:
    189 ; AVX:       # %bb.0:
    190 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,0,0]
    191 ; AVX-NEXT:    retq
    192   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
    193   ret <4 x float> %shuffle
    194 }
    195 define <4 x float> @shuffle_v4f32_3330(<4 x float> %a, <4 x float> %b) {
    196 ; SSE-LABEL: shuffle_v4f32_3330:
    197 ; SSE:       # %bb.0:
    198 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,0]
    199 ; SSE-NEXT:    retq
    200 ;
    201 ; AVX-LABEL: shuffle_v4f32_3330:
    202 ; AVX:       # %bb.0:
    203 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,0]
    204 ; AVX-NEXT:    retq
    205   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
    206   ret <4 x float> %shuffle
    207 }
    208 define <4 x float> @shuffle_v4f32_3210(<4 x float> %a, <4 x float> %b) {
    209 ; SSE-LABEL: shuffle_v4f32_3210:
    210 ; SSE:       # %bb.0:
    211 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
    212 ; SSE-NEXT:    retq
    213 ;
    214 ; AVX-LABEL: shuffle_v4f32_3210:
    215 ; AVX:       # %bb.0:
    216 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
    217 ; AVX-NEXT:    retq
    218   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    219   ret <4 x float> %shuffle
    220 }
    221 define <4 x float> @shuffle_v4f32_0011(<4 x float> %a, <4 x float> %b) {
    222 ; SSE-LABEL: shuffle_v4f32_0011:
    223 ; SSE:       # %bb.0:
    224 ; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
    225 ; SSE-NEXT:    retq
    226 ;
    227 ; AVX-LABEL: shuffle_v4f32_0011:
    228 ; AVX:       # %bb.0:
    229 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
    230 ; AVX-NEXT:    retq
    231   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
    232   ret <4 x float> %shuffle
    233 }
    234 define <4 x float> @shuffle_v4f32_2233(<4 x float> %a, <4 x float> %b) {
    235 ; SSE-LABEL: shuffle_v4f32_2233:
    236 ; SSE:       # %bb.0:
    237 ; SSE-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
    238 ; SSE-NEXT:    retq
    239 ;
    240 ; AVX-LABEL: shuffle_v4f32_2233:
    241 ; AVX:       # %bb.0:
    242 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
    243 ; AVX-NEXT:    retq
    244   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
    245   ret <4 x float> %shuffle
    246 }
    247 define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {
    248 ; SSE2-LABEL: shuffle_v4f32_0022:
    249 ; SSE2:       # %bb.0:
    250 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,2,2]
    251 ; SSE2-NEXT:    retq
    252 ;
    253 ; SSE3-LABEL: shuffle_v4f32_0022:
    254 ; SSE3:       # %bb.0:
    255 ; SSE3-NEXT:    movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
    256 ; SSE3-NEXT:    retq
    257 ;
    258 ; SSSE3-LABEL: shuffle_v4f32_0022:
    259 ; SSSE3:       # %bb.0:
    260 ; SSSE3-NEXT:    movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
    261 ; SSSE3-NEXT:    retq
    262 ;
    263 ; SSE41-LABEL: shuffle_v4f32_0022:
    264 ; SSE41:       # %bb.0:
    265 ; SSE41-NEXT:    movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
    266 ; SSE41-NEXT:    retq
    267 ;
    268 ; AVX-LABEL: shuffle_v4f32_0022:
    269 ; AVX:       # %bb.0:
    270 ; AVX-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
    271 ; AVX-NEXT:    retq
    272   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
    273   ret <4 x float> %shuffle
    274 }
    275 define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) {
    276 ; SSE2-LABEL: shuffle_v4f32_1133:
    277 ; SSE2:       # %bb.0:
    278 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,3,3]
    279 ; SSE2-NEXT:    retq
    280 ;
    281 ; SSE3-LABEL: shuffle_v4f32_1133:
    282 ; SSE3:       # %bb.0:
    283 ; SSE3-NEXT:    movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
    284 ; SSE3-NEXT:    retq
    285 ;
    286 ; SSSE3-LABEL: shuffle_v4f32_1133:
    287 ; SSSE3:       # %bb.0:
    288 ; SSSE3-NEXT:    movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
    289 ; SSSE3-NEXT:    retq
    290 ;
    291 ; SSE41-LABEL: shuffle_v4f32_1133:
    292 ; SSE41:       # %bb.0:
    293 ; SSE41-NEXT:    movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
    294 ; SSE41-NEXT:    retq
    295 ;
    296 ; AVX-LABEL: shuffle_v4f32_1133:
    297 ; AVX:       # %bb.0:
    298 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
    299 ; AVX-NEXT:    retq
    300   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
    301   ret <4 x float> %shuffle
    302 }
    303 
    304 define <4 x float> @shuffle_v4f32_0145(<4 x float> %a, <4 x float> %b) {
    305 ; SSE-LABEL: shuffle_v4f32_0145:
    306 ; SSE:       # %bb.0:
    307 ; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    308 ; SSE-NEXT:    retq
    309 ;
    310 ; AVX-LABEL: shuffle_v4f32_0145:
    311 ; AVX:       # %bb.0:
    312 ; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    313 ; AVX-NEXT:    retq
    314   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
    315   ret <4 x float> %shuffle
    316 }
    317 
    318 define <4 x float> @shuffle_v4f32_6723(<4 x float> %a, <4 x float> %b) {
    319 ; SSE-LABEL: shuffle_v4f32_6723:
    320 ; SSE:       # %bb.0:
    321 ; SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
    322 ; SSE-NEXT:    retq
    323 ;
    324 ; AVX-LABEL: shuffle_v4f32_6723:
    325 ; AVX:       # %bb.0:
    326 ; AVX-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
    327 ; AVX-NEXT:    retq
    328   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
    329   ret <4 x float> %shuffle
    330 }
    331 
    332 define <4 x i32> @shuffle_v4i32_0124(<4 x i32> %a, <4 x i32> %b) {
    333 ; SSE2-LABEL: shuffle_v4i32_0124:
    334 ; SSE2:       # %bb.0:
    335 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
    336 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
    337 ; SSE2-NEXT:    retq
    338 ;
    339 ; SSE3-LABEL: shuffle_v4i32_0124:
    340 ; SSE3:       # %bb.0:
    341 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
    342 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
    343 ; SSE3-NEXT:    retq
    344 ;
    345 ; SSSE3-LABEL: shuffle_v4i32_0124:
    346 ; SSSE3:       # %bb.0:
    347 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
    348 ; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
    349 ; SSSE3-NEXT:    retq
    350 ;
    351 ; SSE41-LABEL: shuffle_v4i32_0124:
    352 ; SSE41:       # %bb.0:
    353 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
    354 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
    355 ; SSE41-NEXT:    retq
    356 ;
    357 ; AVX1-LABEL: shuffle_v4i32_0124:
    358 ; AVX1:       # %bb.0:
    359 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[0,1,2,0]
    360 ; AVX1-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
    361 ; AVX1-NEXT:    retq
    362 ;
    363 ; AVX2OR512VL-LABEL: shuffle_v4i32_0124:
    364 ; AVX2OR512VL:       # %bb.0:
    365 ; AVX2OR512VL-NEXT:    vbroadcastss %xmm1, %xmm1
    366 ; AVX2OR512VL-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
    367 ; AVX2OR512VL-NEXT:    retq
    368   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
    369   ret <4 x i32> %shuffle
    370 }
    371 define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) {
    372 ; SSE2-LABEL: shuffle_v4i32_0142:
    373 ; SSE2:       # %bb.0:
    374 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
    375 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
    376 ; SSE2-NEXT:    retq
    377 ;
    378 ; SSE3-LABEL: shuffle_v4i32_0142:
    379 ; SSE3:       # %bb.0:
    380 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
    381 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
    382 ; SSE3-NEXT:    retq
    383 ;
    384 ; SSSE3-LABEL: shuffle_v4i32_0142:
    385 ; SSSE3:       # %bb.0:
    386 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
    387 ; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
    388 ; SSSE3-NEXT:    retq
    389 ;
    390 ; SSE41-LABEL: shuffle_v4i32_0142:
    391 ; SSE41:       # %bb.0:
    392 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
    393 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,2,2]
    394 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
    395 ; SSE41-NEXT:    retq
    396 ;
    397 ; AVX1-LABEL: shuffle_v4i32_0142:
    398 ; AVX1:       # %bb.0:
    399 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[0,1,0,1]
    400 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,2,2]
    401 ; AVX1-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
    402 ; AVX1-NEXT:    retq
    403 ;
    404 ; AVX2OR512VL-LABEL: shuffle_v4i32_0142:
    405 ; AVX2OR512VL:       # %bb.0:
    406 ; AVX2OR512VL-NEXT:    vpbroadcastq %xmm1, %xmm1
    407 ; AVX2OR512VL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,2]
    408 ; AVX2OR512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
    409 ; AVX2OR512VL-NEXT:    retq
    410   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
    411   ret <4 x i32> %shuffle
    412 }
    413 define <4 x i32> @shuffle_v4i32_0412(<4 x i32> %a, <4 x i32> %b) {
    414 ; SSE2-LABEL: shuffle_v4i32_0412:
    415 ; SSE2:       # %bb.0:
    416 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
    417 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2]
    418 ; SSE2-NEXT:    movaps %xmm1, %xmm0
    419 ; SSE2-NEXT:    retq
    420 ;
    421 ; SSE3-LABEL: shuffle_v4i32_0412:
    422 ; SSE3:       # %bb.0:
    423 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
    424 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2]
    425 ; SSE3-NEXT:    movaps %xmm1, %xmm0
    426 ; SSE3-NEXT:    retq
    427 ;
    428 ; SSSE3-LABEL: shuffle_v4i32_0412:
    429 ; SSSE3:       # %bb.0:
    430 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
    431 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2]
    432 ; SSSE3-NEXT:    movaps %xmm1, %xmm0
    433 ; SSSE3-NEXT:    retq
    434 ;
    435 ; SSE41-LABEL: shuffle_v4i32_0412:
    436 ; SSE41:       # %bb.0:
    437 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
    438 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
    439 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
    440 ; SSE41-NEXT:    retq
    441 ;
    442 ; AVX1-LABEL: shuffle_v4i32_0412:
    443 ; AVX1:       # %bb.0:
    444 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1]
    445 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,2]
    446 ; AVX1-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
    447 ; AVX1-NEXT:    retq
    448 ;
    449 ; AVX2OR512VL-LABEL: shuffle_v4i32_0412:
    450 ; AVX2OR512VL:       # %bb.0:
    451 ; AVX2OR512VL-NEXT:    vbroadcastss %xmm1, %xmm1
    452 ; AVX2OR512VL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,2]
    453 ; AVX2OR512VL-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
    454 ; AVX2OR512VL-NEXT:    retq
    455   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
    456   ret <4 x i32> %shuffle
    457 }
    458 define <4 x i32> @shuffle_v4i32_4012(<4 x i32> %a, <4 x i32> %b) {
    459 ; SSE2-LABEL: shuffle_v4i32_4012:
    460 ; SSE2:       # %bb.0:
    461 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
    462 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
    463 ; SSE2-NEXT:    movaps %xmm1, %xmm0
    464 ; SSE2-NEXT:    retq
    465 ;
    466 ; SSE3-LABEL: shuffle_v4i32_4012:
    467 ; SSE3:       # %bb.0:
    468 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
    469 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
    470 ; SSE3-NEXT:    movaps %xmm1, %xmm0
    471 ; SSE3-NEXT:    retq
    472 ;
    473 ; SSSE3-LABEL: shuffle_v4i32_4012:
    474 ; SSSE3:       # %bb.0:
    475 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
    476 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
    477 ; SSSE3-NEXT:    movaps %xmm1, %xmm0
    478 ; SSSE3-NEXT:    retq
    479 ;
    480 ; SSE41-LABEL: shuffle_v4i32_4012:
    481 ; SSE41:       # %bb.0:
    482 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,2]
    483 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
    484 ; SSE41-NEXT:    retq
    485 ;
    486 ; AVX-LABEL: shuffle_v4i32_4012:
    487 ; AVX:       # %bb.0:
    488 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,2]
    489 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
    490 ; AVX-NEXT:    retq
    491   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
    492   ret <4 x i32> %shuffle
    493 }
    494 define <4 x i32> @shuffle_v4i32_0145(<4 x i32> %a, <4 x i32> %b) {
    495 ; SSE-LABEL: shuffle_v4i32_0145:
    496 ; SSE:       # %bb.0:
    497 ; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    498 ; SSE-NEXT:    retq
    499 ;
    500 ; AVX-LABEL: shuffle_v4i32_0145:
    501 ; AVX:       # %bb.0:
    502 ; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    503 ; AVX-NEXT:    retq
    504   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
    505   ret <4 x i32> %shuffle
    506 }
    507 define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) {
    508 ; SSE2-LABEL: shuffle_v4i32_0451:
    509 ; SSE2:       # %bb.0:
    510 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    511 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
    512 ; SSE2-NEXT:    retq
    513 ;
    514 ; SSE3-LABEL: shuffle_v4i32_0451:
    515 ; SSE3:       # %bb.0:
    516 ; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    517 ; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
    518 ; SSE3-NEXT:    retq
    519 ;
    520 ; SSSE3-LABEL: shuffle_v4i32_0451:
    521 ; SSSE3:       # %bb.0:
    522 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    523 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
    524 ; SSSE3-NEXT:    retq
    525 ;
    526 ; SSE41-LABEL: shuffle_v4i32_0451:
    527 ; SSE41:       # %bb.0:
    528 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
    529 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
    530 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
    531 ; SSE41-NEXT:    retq
    532 ;
    533 ; AVX1-LABEL: shuffle_v4i32_0451:
    534 ; AVX1:       # %bb.0:
    535 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1]
    536 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
    537 ; AVX1-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
    538 ; AVX1-NEXT:    retq
    539 ;
    540 ; AVX2OR512VL-LABEL: shuffle_v4i32_0451:
    541 ; AVX2OR512VL:       # %bb.0:
    542 ; AVX2OR512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
    543 ; AVX2OR512VL-NEXT:    vpbroadcastq %xmm0, %xmm0
    544 ; AVX2OR512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
    545 ; AVX2OR512VL-NEXT:    retq
    546   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
    547   ret <4 x i32> %shuffle
    548 }
    549 define <4 x i32> @shuffle_v4i32_4501(<4 x i32> %a, <4 x i32> %b) {
    550 ; SSE-LABEL: shuffle_v4i32_4501:
    551 ; SSE:       # %bb.0:
    552 ; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
    553 ; SSE-NEXT:    movaps %xmm1, %xmm0
    554 ; SSE-NEXT:    retq
    555 ;
    556 ; AVX-LABEL: shuffle_v4i32_4501:
    557 ; AVX:       # %bb.0:
    558 ; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
    559 ; AVX-NEXT:    retq
    560   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
    561   ret <4 x i32> %shuffle
    562 }
    563 define <4 x i32> @shuffle_v4i32_4015(<4 x i32> %a, <4 x i32> %b) {
    564 ; SSE2-LABEL: shuffle_v4i32_4015:
    565 ; SSE2:       # %bb.0:
    566 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    567 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
    568 ; SSE2-NEXT:    retq
    569 ;
    570 ; SSE3-LABEL: shuffle_v4i32_4015:
    571 ; SSE3:       # %bb.0:
    572 ; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    573 ; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
    574 ; SSE3-NEXT:    retq
    575 ;
    576 ; SSSE3-LABEL: shuffle_v4i32_4015:
    577 ; SSSE3:       # %bb.0:
    578 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    579 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
    580 ; SSSE3-NEXT:    retq
    581 ;
    582 ; SSE41-LABEL: shuffle_v4i32_4015:
    583 ; SSE41:       # %bb.0:
    584 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
    585 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
    586 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
    587 ; SSE41-NEXT:    retq
    588 ;
    589 ; AVX1-LABEL: shuffle_v4i32_4015:
    590 ; AVX1:       # %bb.0:
    591 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[0,1,0,1]
    592 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
    593 ; AVX1-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
    594 ; AVX1-NEXT:    retq
    595 ;
    596 ; AVX2OR512VL-LABEL: shuffle_v4i32_4015:
    597 ; AVX2OR512VL:       # %bb.0:
    598 ; AVX2OR512VL-NEXT:    vpbroadcastq %xmm1, %xmm1
    599 ; AVX2OR512VL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
    600 ; AVX2OR512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
    601 ; AVX2OR512VL-NEXT:    retq
    602   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5>
    603   ret <4 x i32> %shuffle
    604 }
    605 
    606 define <4 x float> @shuffle_v4f32_4zzz(<4 x float> %a) {
    607 ; SSE2-LABEL: shuffle_v4f32_4zzz:
    608 ; SSE2:       # %bb.0:
    609 ; SSE2-NEXT:    xorps %xmm1, %xmm1
    610 ; SSE2-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
    611 ; SSE2-NEXT:    movaps %xmm1, %xmm0
    612 ; SSE2-NEXT:    retq
    613 ;
    614 ; SSE3-LABEL: shuffle_v4f32_4zzz:
    615 ; SSE3:       # %bb.0:
    616 ; SSE3-NEXT:    xorps %xmm1, %xmm1
    617 ; SSE3-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
    618 ; SSE3-NEXT:    movaps %xmm1, %xmm0
    619 ; SSE3-NEXT:    retq
    620 ;
    621 ; SSSE3-LABEL: shuffle_v4f32_4zzz:
    622 ; SSSE3:       # %bb.0:
    623 ; SSSE3-NEXT:    xorps %xmm1, %xmm1
    624 ; SSSE3-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
    625 ; SSSE3-NEXT:    movaps %xmm1, %xmm0
    626 ; SSSE3-NEXT:    retq
    627 ;
    628 ; SSE41-LABEL: shuffle_v4f32_4zzz:
    629 ; SSE41:       # %bb.0:
    630 ; SSE41-NEXT:    xorps %xmm1, %xmm1
    631 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
    632 ; SSE41-NEXT:    retq
    633 ;
    634 ; AVX-LABEL: shuffle_v4f32_4zzz:
    635 ; AVX:       # %bb.0:
    636 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    637 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
    638 ; AVX-NEXT:    retq
    639   %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
    640   ret <4 x float> %shuffle
    641 }
    642 
    643 define <4 x float> @shuffle_v4f32_z4zz(<4 x float> %a) {
    644 ; SSE2-LABEL: shuffle_v4f32_z4zz:
    645 ; SSE2:       # %bb.0:
    646 ; SSE2-NEXT:    xorps %xmm1, %xmm1
    647 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
    648 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
    649 ; SSE2-NEXT:    retq
    650 ;
    651 ; SSE3-LABEL: shuffle_v4f32_z4zz:
    652 ; SSE3:       # %bb.0:
    653 ; SSE3-NEXT:    xorps %xmm1, %xmm1
    654 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
    655 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
    656 ; SSE3-NEXT:    retq
    657 ;
    658 ; SSSE3-LABEL: shuffle_v4f32_z4zz:
    659 ; SSSE3:       # %bb.0:
    660 ; SSSE3-NEXT:    xorps %xmm1, %xmm1
    661 ; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
    662 ; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
    663 ; SSSE3-NEXT:    retq
    664 ;
    665 ; SSE41-LABEL: shuffle_v4f32_z4zz:
    666 ; SSE41:       # %bb.0:
    667 ; SSE41-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero
    668 ; SSE41-NEXT:    retq
    669 ;
    670 ; AVX-LABEL: shuffle_v4f32_z4zz:
    671 ; AVX:       # %bb.0:
    672 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero
    673 ; AVX-NEXT:    retq
    674   %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0>
    675   ret <4 x float> %shuffle
    676 }
    677 
    678 define <4 x float> @shuffle_v4f32_zz4z(<4 x float> %a) {
    679 ; SSE2-LABEL: shuffle_v4f32_zz4z:
    680 ; SSE2:       # %bb.0:
    681 ; SSE2-NEXT:    xorps %xmm1, %xmm1
    682 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
    683 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
    684 ; SSE2-NEXT:    movaps %xmm1, %xmm0
    685 ; SSE2-NEXT:    retq
    686 ;
    687 ; SSE3-LABEL: shuffle_v4f32_zz4z:
    688 ; SSE3:       # %bb.0:
    689 ; SSE3-NEXT:    xorps %xmm1, %xmm1
    690 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
    691 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
    692 ; SSE3-NEXT:    movaps %xmm1, %xmm0
    693 ; SSE3-NEXT:    retq
    694 ;
    695 ; SSSE3-LABEL: shuffle_v4f32_zz4z:
    696 ; SSSE3:       # %bb.0:
    697 ; SSSE3-NEXT:    xorps %xmm1, %xmm1
    698 ; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
    699 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
    700 ; SSSE3-NEXT:    movaps %xmm1, %xmm0
    701 ; SSSE3-NEXT:    retq
    702 ;
    703 ; SSE41-LABEL: shuffle_v4f32_zz4z:
    704 ; SSE41:       # %bb.0:
    705 ; SSE41-NEXT:    insertps {{.*#+}} xmm0 = zero,zero,xmm0[0],zero
    706 ; SSE41-NEXT:    retq
    707 ;
    708 ; AVX-LABEL: shuffle_v4f32_zz4z:
    709 ; AVX:       # %bb.0:
    710 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = zero,zero,xmm0[0],zero
    711 ; AVX-NEXT:    retq
    712   %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0>
    713   ret <4 x float> %shuffle
    714 }
    715 
    716 define <4 x float> @shuffle_v4f32_zuu4(<4 x float> %a) {
    717 ; SSE2-LABEL: shuffle_v4f32_zuu4:
    718 ; SSE2:       # %bb.0:
    719 ; SSE2-NEXT:    xorps %xmm1, %xmm1
    720 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
    721 ; SSE2-NEXT:    movaps %xmm1, %xmm0
    722 ; SSE2-NEXT:    retq
    723 ;
    724 ; SSE3-LABEL: shuffle_v4f32_zuu4:
    725 ; SSE3:       # %bb.0:
    726 ; SSE3-NEXT:    xorps %xmm1, %xmm1
    727 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
    728 ; SSE3-NEXT:    movaps %xmm1, %xmm0
    729 ; SSE3-NEXT:    retq
    730 ;
    731 ; SSSE3-LABEL: shuffle_v4f32_zuu4:
    732 ; SSSE3:       # %bb.0:
    733 ; SSSE3-NEXT:    xorps %xmm1, %xmm1
    734 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
    735 ; SSSE3-NEXT:    movaps %xmm1, %xmm0
    736 ; SSSE3-NEXT:    retq
    737 ;
    738 ; SSE41-LABEL: shuffle_v4f32_zuu4:
    739 ; SSE41:       # %bb.0:
    740 ; SSE41-NEXT:    insertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[0]
    741 ; SSE41-NEXT:    retq
    742 ;
    743 ; AVX-LABEL: shuffle_v4f32_zuu4:
    744 ; AVX:       # %bb.0:
    745 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[0]
    746 ; AVX-NEXT:    retq
    747   %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 4>
    748   ret <4 x float> %shuffle
    749 }
    750 
    751 define <4 x float> @shuffle_v4f32_zzz7(<4 x float> %a) {
    752 ; SSE2-LABEL: shuffle_v4f32_zzz7:
    753 ; SSE2:       # %bb.0:
    754 ; SSE2-NEXT:    xorps %xmm1, %xmm1
    755 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
    756 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
    757 ; SSE2-NEXT:    movaps %xmm1, %xmm0
    758 ; SSE2-NEXT:    retq
    759 ;
    760 ; SSE3-LABEL: shuffle_v4f32_zzz7:
    761 ; SSE3:       # %bb.0:
    762 ; SSE3-NEXT:    xorps %xmm1, %xmm1
    763 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
    764 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
    765 ; SSE3-NEXT:    movaps %xmm1, %xmm0
    766 ; SSE3-NEXT:    retq
    767 ;
    768 ; SSSE3-LABEL: shuffle_v4f32_zzz7:
    769 ; SSSE3:       # %bb.0:
    770 ; SSSE3-NEXT:    xorps %xmm1, %xmm1
    771 ; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
    772 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
    773 ; SSSE3-NEXT:    movaps %xmm1, %xmm0
    774 ; SSSE3-NEXT:    retq
    775 ;
    776 ; SSE41-LABEL: shuffle_v4f32_zzz7:
    777 ; SSE41:       # %bb.0:
    778 ; SSE41-NEXT:    xorps %xmm1, %xmm1
    779 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
    780 ; SSE41-NEXT:    retq
    781 ;
    782 ; AVX-LABEL: shuffle_v4f32_zzz7:
    783 ; AVX:       # %bb.0:
    784 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    785 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
    786 ; AVX-NEXT:    retq
    787   %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
    788   ret <4 x float> %shuffle
    789 }
    790 
    791 define <4 x float> @shuffle_v4f32_z6zz(<4 x float> %a) {
    792 ; SSE2-LABEL: shuffle_v4f32_z6zz:
    793 ; SSE2:       # %bb.0:
    794 ; SSE2-NEXT:    xorps %xmm1, %xmm1
    795 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
    796 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
    797 ; SSE2-NEXT:    retq
    798 ;
    799 ; SSE3-LABEL: shuffle_v4f32_z6zz:
    800 ; SSE3:       # %bb.0:
    801 ; SSE3-NEXT:    xorps %xmm1, %xmm1
    802 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
    803 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
    804 ; SSE3-NEXT:    retq
    805 ;
    806 ; SSSE3-LABEL: shuffle_v4f32_z6zz:
    807 ; SSSE3:       # %bb.0:
    808 ; SSSE3-NEXT:    xorps %xmm1, %xmm1
    809 ; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
    810 ; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
    811 ; SSSE3-NEXT:    retq
    812 ;
    813 ; SSE41-LABEL: shuffle_v4f32_z6zz:
    814 ; SSE41:       # %bb.0:
    815 ; SSE41-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero
    816 ; SSE41-NEXT:    retq
    817 ;
    818 ; AVX-LABEL: shuffle_v4f32_z6zz:
    819 ; AVX:       # %bb.0:
    820 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero
    821 ; AVX-NEXT:    retq
    822   %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
    823   ret <4 x float> %shuffle
    824 }
    825 
    826 define <4 x float> @shuffle_v4f32_0z23(<4 x float> %a) {
    827 ; SSE2-LABEL: shuffle_v4f32_0z23:
    828 ; SSE2:       # %bb.0:
    829 ; SSE2-NEXT:    xorps %xmm1, %xmm1
    830 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
    831 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
    832 ; SSE2-NEXT:    movaps %xmm1, %xmm0
    833 ; SSE2-NEXT:    retq
    834 ;
    835 ; SSE3-LABEL: shuffle_v4f32_0z23:
    836 ; SSE3:       # %bb.0:
    837 ; SSE3-NEXT:    xorps %xmm1, %xmm1
    838 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
    839 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
    840 ; SSE3-NEXT:    movaps %xmm1, %xmm0
    841 ; SSE3-NEXT:    retq
    842 ;
    843 ; SSSE3-LABEL: shuffle_v4f32_0z23:
    844 ; SSSE3:       # %bb.0:
    845 ; SSSE3-NEXT:    xorps %xmm1, %xmm1
    846 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
    847 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
    848 ; SSSE3-NEXT:    movaps %xmm1, %xmm0
    849 ; SSSE3-NEXT:    retq
    850 ;
    851 ; SSE41-LABEL: shuffle_v4f32_0z23:
    852 ; SSE41:       # %bb.0:
    853 ; SSE41-NEXT:    xorps %xmm1, %xmm1
    854 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
    855 ; SSE41-NEXT:    retq
    856 ;
    857 ; AVX-LABEL: shuffle_v4f32_0z23:
    858 ; AVX:       # %bb.0:
    859 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    860 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
    861 ; AVX-NEXT:    retq
    862   %shuffle = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
    863   ret <4 x float> %shuffle
    864 }
    865 
    866 define <4 x float> @shuffle_v4f32_01z3(<4 x float> %a) {
    867 ; SSE2-LABEL: shuffle_v4f32_01z3:
    868 ; SSE2:       # %bb.0:
    869 ; SSE2-NEXT:    xorps %xmm1, %xmm1
    870 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
    871 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
    872 ; SSE2-NEXT:    retq
    873 ;
    874 ; SSE3-LABEL: shuffle_v4f32_01z3:
    875 ; SSE3:       # %bb.0:
    876 ; SSE3-NEXT:    xorps %xmm1, %xmm1
    877 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
    878 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
    879 ; SSE3-NEXT:    retq
    880 ;
    881 ; SSSE3-LABEL: shuffle_v4f32_01z3:
    882 ; SSSE3:       # %bb.0:
    883 ; SSSE3-NEXT:    xorps %xmm1, %xmm1
    884 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
    885 ; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
    886 ; SSSE3-NEXT:    retq
    887 ;
    888 ; SSE41-LABEL: shuffle_v4f32_01z3:
    889 ; SSE41:       # %bb.0:
    890 ; SSE41-NEXT:    xorps %xmm1, %xmm1
    891 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
    892 ; SSE41-NEXT:    retq
    893 ;
    894 ; AVX-LABEL: shuffle_v4f32_01z3:
    895 ; AVX:       # %bb.0:
    896 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    897 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
    898 ; AVX-NEXT:    retq
    899   %shuffle = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
    900   ret <4 x float> %shuffle
    901 }
    902 
    903 define <4 x float> @shuffle_v4f32_012z(<4 x float> %a) {
    904 ; SSE2-LABEL: shuffle_v4f32_012z:
    905 ; SSE2:       # %bb.0:
    906 ; SSE2-NEXT:    xorps %xmm1, %xmm1
    907 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[2,0]
    908 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
    909 ; SSE2-NEXT:    retq
    910 ;
    911 ; SSE3-LABEL: shuffle_v4f32_012z:
    912 ; SSE3:       # %bb.0:
    913 ; SSE3-NEXT:    xorps %xmm1, %xmm1
    914 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[2,0]
    915 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
    916 ; SSE3-NEXT:    retq
    917 ;
    918 ; SSSE3-LABEL: shuffle_v4f32_012z:
    919 ; SSSE3:       # %bb.0:
    920 ; SSSE3-NEXT:    xorps %xmm1, %xmm1
    921 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[2,0]
    922 ; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
    923 ; SSSE3-NEXT:    retq
    924 ;
    925 ; SSE41-LABEL: shuffle_v4f32_012z:
    926 ; SSE41:       # %bb.0:
    927 ; SSE41-NEXT:    xorps %xmm1, %xmm1
    928 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
    929 ; SSE41-NEXT:    retq
    930 ;
    931 ; AVX-LABEL: shuffle_v4f32_012z:
    932 ; AVX:       # %bb.0:
    933 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    934 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
    935 ; AVX-NEXT:    retq
    936   %shuffle = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
    937   ret <4 x float> %shuffle
    938 }
    939 
    940 define <4 x float> @shuffle_v4f32_0zz3(<4 x float> %a) {
    941 ; SSE2-LABEL: shuffle_v4f32_0zz3:
    942 ; SSE2:       # %bb.0:
    943 ; SSE2-NEXT:    xorps %xmm1, %xmm1
    944 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,3],xmm1[1,2]
    945 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
    946 ; SSE2-NEXT:    retq
    947 ;
    948 ; SSE3-LABEL: shuffle_v4f32_0zz3:
    949 ; SSE3:       # %bb.0:
    950 ; SSE3-NEXT:    xorps %xmm1, %xmm1
    951 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,3],xmm1[1,2]
    952 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
    953 ; SSE3-NEXT:    retq
    954 ;
    955 ; SSSE3-LABEL: shuffle_v4f32_0zz3:
    956 ; SSSE3:       # %bb.0:
    957 ; SSSE3-NEXT:    xorps %xmm1, %xmm1
    958 ; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,3],xmm1[1,2]
    959 ; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
    960 ; SSSE3-NEXT:    retq
    961 ;
    962 ; SSE41-LABEL: shuffle_v4f32_0zz3:
    963 ; SSE41:       # %bb.0:
    964 ; SSE41-NEXT:    xorps %xmm1, %xmm1
    965 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
    966 ; SSE41-NEXT:    retq
    967 ;
    968 ; AVX-LABEL: shuffle_v4f32_0zz3:
    969 ; AVX:       # %bb.0:
    970 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    971 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
    972 ; AVX-NEXT:    retq
    973   %shuffle = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 3>
    974   ret <4 x float> %shuffle
    975 }
    976 
    977 define <4 x float> @shuffle_v4f32_0z2z(<4 x float> %v) {
    978 ; SSE2-LABEL: shuffle_v4f32_0z2z:
    979 ; SSE2:       # %bb.0:
    980 ; SSE2-NEXT:    xorps %xmm1, %xmm1
    981 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0]
    982 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
    983 ; SSE2-NEXT:    retq
    984 ;
    985 ; SSE3-LABEL: shuffle_v4f32_0z2z:
    986 ; SSE3:       # %bb.0:
    987 ; SSE3-NEXT:    xorps %xmm1, %xmm1
    988 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0]
    989 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
    990 ; SSE3-NEXT:    retq
    991 ;
    992 ; SSSE3-LABEL: shuffle_v4f32_0z2z:
    993 ; SSSE3:       # %bb.0:
    994 ; SSSE3-NEXT:    xorps %xmm1, %xmm1
    995 ; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0]
    996 ; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
    997 ; SSSE3-NEXT:    retq
    998 ;
    999 ; SSE41-LABEL: shuffle_v4f32_0z2z:
   1000 ; SSE41:       # %bb.0:
   1001 ; SSE41-NEXT:    xorps %xmm1, %xmm1
   1002 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
   1003 ; SSE41-NEXT:    retq
   1004 ;
   1005 ; AVX-LABEL: shuffle_v4f32_0z2z:
   1006 ; AVX:       # %bb.0:
   1007 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1008 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
   1009 ; AVX-NEXT:    retq
   1010   %shuffle = shufflevector <4 x float> %v, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 0, i32 4, i32 2, i32 4>
   1011   ret <4 x float> %shuffle
   1012 }
   1013 
   1014 define <4 x float> @shuffle_v4f32_u051(<4 x float> %a, <4 x float> %b) {
   1015 ; SSE-LABEL: shuffle_v4f32_u051:
   1016 ; SSE:       # %bb.0:
   1017 ; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
   1018 ; SSE-NEXT:    movaps %xmm1, %xmm0
   1019 ; SSE-NEXT:    retq
   1020 ;
   1021 ; AVX-LABEL: shuffle_v4f32_u051:
   1022 ; AVX:       # %bb.0:
   1023 ; AVX-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
   1024 ; AVX-NEXT:    retq
   1025   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 undef, i32 0, i32 5, i32 1>
   1026   ret <4 x float> %shuffle
   1027 }
   1028 
   1029 define <4 x float> @shuffle_v4f32_0zz4(<4 x float> %a, <4 x float> %b) {
   1030 ; SSE2-LABEL: shuffle_v4f32_0zz4:
   1031 ; SSE2:       # %bb.0:
   1032 ; SSE2-NEXT:    xorps %xmm2, %xmm2
   1033 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm2[2,0]
   1034 ; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0]
   1035 ; SSE2-NEXT:    movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
   1036 ; SSE2-NEXT:    movaps %xmm2, %xmm0
   1037 ; SSE2-NEXT:    retq
   1038 ;
   1039 ; SSE3-LABEL: shuffle_v4f32_0zz4:
   1040 ; SSE3:       # %bb.0:
   1041 ; SSE3-NEXT:    xorps %xmm2, %xmm2
   1042 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm2[2,0]
   1043 ; SSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0]
   1044 ; SSE3-NEXT:    movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
   1045 ; SSE3-NEXT:    movaps %xmm2, %xmm0
   1046 ; SSE3-NEXT:    retq
   1047 ;
   1048 ; SSSE3-LABEL: shuffle_v4f32_0zz4:
   1049 ; SSSE3:       # %bb.0:
   1050 ; SSSE3-NEXT:    xorps %xmm2, %xmm2
   1051 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm2[2,0]
   1052 ; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0]
   1053 ; SSSE3-NEXT:    movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
   1054 ; SSSE3-NEXT:    movaps %xmm2, %xmm0
   1055 ; SSSE3-NEXT:    retq
   1056 ;
   1057 ; SSE41-LABEL: shuffle_v4f32_0zz4:
   1058 ; SSE41:       # %bb.0:
   1059 ; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0]
   1060 ; SSE41-NEXT:    retq
   1061 ;
   1062 ; AVX-LABEL: shuffle_v4f32_0zz4:
   1063 ; AVX:       # %bb.0:
   1064 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0]
   1065 ; AVX-NEXT:    retq
   1066   %shuffle = shufflevector <4 x float> %b, <4 x float> zeroinitializer, <4 x i32> <i32 undef, i32 5, i32 6, i32 0>
   1067   %shuffle1 = shufflevector <4 x float> %a, <4 x float> %shuffle, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   1068   ret <4 x float> %shuffle1
   1069 }
   1070 
   1071 define <4 x float> @shuffle_v4f32_0zz6(<4 x float> %a, <4 x float> %b) {
   1072 ; SSE2-LABEL: shuffle_v4f32_0zz6:
   1073 ; SSE2:       # %bb.0:
   1074 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2]
   1075 ; SSE2-NEXT:    xorps %xmm1, %xmm1
   1076 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[0,3]
   1077 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0,1,3]
   1078 ; SSE2-NEXT:    movaps %xmm1, %xmm0
   1079 ; SSE2-NEXT:    retq
   1080 ;
   1081 ; SSE3-LABEL: shuffle_v4f32_0zz6:
   1082 ; SSE3:       # %bb.0:
   1083 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2]
   1084 ; SSE3-NEXT:    xorps %xmm1, %xmm1
   1085 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[0,3]
   1086 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0,1,3]
   1087 ; SSE3-NEXT:    movaps %xmm1, %xmm0
   1088 ; SSE3-NEXT:    retq
   1089 ;
   1090 ; SSSE3-LABEL: shuffle_v4f32_0zz6:
   1091 ; SSSE3:       # %bb.0:
   1092 ; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2]
   1093 ; SSSE3-NEXT:    xorps %xmm1, %xmm1
   1094 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[0,3]
   1095 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0,1,3]
   1096 ; SSSE3-NEXT:    movaps %xmm1, %xmm0
   1097 ; SSSE3-NEXT:    retq
   1098 ;
   1099 ; SSE41-LABEL: shuffle_v4f32_0zz6:
   1100 ; SSE41:       # %bb.0:
   1101 ; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[2]
   1102 ; SSE41-NEXT:    retq
   1103 ;
   1104 ; AVX-LABEL: shuffle_v4f32_0zz6:
   1105 ; AVX:       # %bb.0:
   1106 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[2]
   1107 ; AVX-NEXT:    retq
   1108   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 undef, i32 undef, i32 6>
   1109   %shuffle1 = shufflevector <4 x float> zeroinitializer, <4 x float> %shuffle, <4 x i32> <i32 4, i32 1, i32 2, i32 7>
   1110   ret <4 x float> %shuffle1
   1111 }
   1112 
   1113 define <4 x float> @shuffle_v4f32_0z24(<4 x float> %a, <4 x float> %b) {
   1114 ; SSE2-LABEL: shuffle_v4f32_0z24:
   1115 ; SSE2:       # %bb.0:
   1116 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
   1117 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
   1118 ; SSE2-NEXT:    xorps %xmm1, %xmm1
   1119 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
   1120 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
   1121 ; SSE2-NEXT:    movaps %xmm1, %xmm0
   1122 ; SSE2-NEXT:    retq
   1123 ;
   1124 ; SSE3-LABEL: shuffle_v4f32_0z24:
   1125 ; SSE3:       # %bb.0:
   1126 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
   1127 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
   1128 ; SSE3-NEXT:    xorps %xmm1, %xmm1
   1129 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
   1130 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
   1131 ; SSE3-NEXT:    movaps %xmm1, %xmm0
   1132 ; SSE3-NEXT:    retq
   1133 ;
   1134 ; SSSE3-LABEL: shuffle_v4f32_0z24:
   1135 ; SSSE3:       # %bb.0:
   1136 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
   1137 ; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
   1138 ; SSSE3-NEXT:    xorps %xmm1, %xmm1
   1139 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
   1140 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
   1141 ; SSSE3-NEXT:    movaps %xmm1, %xmm0
   1142 ; SSSE3-NEXT:    retq
   1143 ;
   1144 ; SSE41-LABEL: shuffle_v4f32_0z24:
   1145 ; SSE41:       # %bb.0:
   1146 ; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[0]
   1147 ; SSE41-NEXT:    retq
   1148 ;
   1149 ; AVX-LABEL: shuffle_v4f32_0z24:
   1150 ; AVX:       # %bb.0:
   1151 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[0]
   1152 ; AVX-NEXT:    retq
   1153   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 undef, i32 2, i32 4>
   1154   %shuffle1 = shufflevector <4 x float> zeroinitializer, <4 x float> %shuffle, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
   1155   ret <4 x float> %shuffle1
   1156 }
   1157 
   1158 define <4 x i32> @shuffle_v4i32_4zzz(<4 x i32> %a) {
   1159 ; SSE2-LABEL: shuffle_v4i32_4zzz:
   1160 ; SSE2:       # %bb.0:
   1161 ; SSE2-NEXT:    xorps %xmm1, %xmm1
   1162 ; SSE2-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
   1163 ; SSE2-NEXT:    movaps %xmm1, %xmm0
   1164 ; SSE2-NEXT:    retq
   1165 ;
   1166 ; SSE3-LABEL: shuffle_v4i32_4zzz:
   1167 ; SSE3:       # %bb.0:
   1168 ; SSE3-NEXT:    xorps %xmm1, %xmm1
   1169 ; SSE3-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
   1170 ; SSE3-NEXT:    movaps %xmm1, %xmm0
   1171 ; SSE3-NEXT:    retq
   1172 ;
   1173 ; SSSE3-LABEL: shuffle_v4i32_4zzz:
   1174 ; SSSE3:       # %bb.0:
   1175 ; SSSE3-NEXT:    xorps %xmm1, %xmm1
   1176 ; SSSE3-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
   1177 ; SSSE3-NEXT:    movaps %xmm1, %xmm0
   1178 ; SSSE3-NEXT:    retq
   1179 ;
   1180 ; SSE41-LABEL: shuffle_v4i32_4zzz:
   1181 ; SSE41:       # %bb.0:
   1182 ; SSE41-NEXT:    xorps %xmm1, %xmm1
   1183 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
   1184 ; SSE41-NEXT:    retq
   1185 ;
   1186 ; AVX-LABEL: shuffle_v4i32_4zzz:
   1187 ; AVX:       # %bb.0:
   1188 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1189 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
   1190 ; AVX-NEXT:    retq
   1191   %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
   1192   ret <4 x i32> %shuffle
   1193 }
   1194 
   1195 define <4 x i32> @shuffle_v4i32_z4zz(<4 x i32> %a) {
   1196 ; SSE2-LABEL: shuffle_v4i32_z4zz:
   1197 ; SSE2:       # %bb.0:
   1198 ; SSE2-NEXT:    xorps %xmm1, %xmm1
   1199 ; SSE2-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
   1200 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
   1201 ; SSE2-NEXT:    retq
   1202 ;
   1203 ; SSE3-LABEL: shuffle_v4i32_z4zz:
   1204 ; SSE3:       # %bb.0:
   1205 ; SSE3-NEXT:    xorps %xmm1, %xmm1
   1206 ; SSE3-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
   1207 ; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
   1208 ; SSE3-NEXT:    retq
   1209 ;
   1210 ; SSSE3-LABEL: shuffle_v4i32_z4zz:
   1211 ; SSSE3:       # %bb.0:
   1212 ; SSSE3-NEXT:    xorps %xmm1, %xmm1
   1213 ; SSSE3-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
   1214 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
   1215 ; SSSE3-NEXT:    retq
   1216 ;
   1217 ; SSE41-LABEL: shuffle_v4i32_z4zz:
   1218 ; SSE41:       # %bb.0:
   1219 ; SSE41-NEXT:    pxor %xmm1, %xmm1
   1220 ; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
   1221 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
   1222 ; SSE41-NEXT:    retq
   1223 ;
   1224 ; AVX1-LABEL: shuffle_v4i32_z4zz:
   1225 ; AVX1:       # %bb.0:
   1226 ; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1227 ; AVX1-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
   1228 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,0,1,1]
   1229 ; AVX1-NEXT:    retq
   1230 ;
   1231 ; AVX2-SLOW-LABEL: shuffle_v4i32_z4zz:
   1232 ; AVX2-SLOW:       # %bb.0:
   1233 ; AVX2-SLOW-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1234 ; AVX2-SLOW-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
   1235 ; AVX2-SLOW-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,0,1,1]
   1236 ; AVX2-SLOW-NEXT:    retq
   1237 ;
   1238 ; AVX2-FAST-LABEL: shuffle_v4i32_z4zz:
   1239 ; AVX2-FAST:       # %bb.0:
   1240 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero
   1241 ; AVX2-FAST-NEXT:    retq
   1242 ;
   1243 ; AVX512VL-LABEL: shuffle_v4i32_z4zz:
   1244 ; AVX512VL:       # %bb.0:
   1245 ; AVX512VL-NEXT:    vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero
   1246 ; AVX512VL-NEXT:    retq
   1247   %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0>
   1248   ret <4 x i32> %shuffle
   1249 }
   1250 
   1251 define <4 x i32> @shuffle_v4i32_zz4z(<4 x i32> %a) {
   1252 ; SSE2-LABEL: shuffle_v4i32_zz4z:
   1253 ; SSE2:       # %bb.0:
   1254 ; SSE2-NEXT:    xorps %xmm1, %xmm1
   1255 ; SSE2-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
   1256 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
   1257 ; SSE2-NEXT:    retq
   1258 ;
   1259 ; SSE3-LABEL: shuffle_v4i32_zz4z:
   1260 ; SSE3:       # %bb.0:
   1261 ; SSE3-NEXT:    xorps %xmm1, %xmm1
   1262 ; SSE3-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
   1263 ; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
   1264 ; SSE3-NEXT:    retq
   1265 ;
   1266 ; SSSE3-LABEL: shuffle_v4i32_zz4z:
   1267 ; SSSE3:       # %bb.0:
   1268 ; SSSE3-NEXT:    xorps %xmm1, %xmm1
   1269 ; SSSE3-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
   1270 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
   1271 ; SSSE3-NEXT:    retq
   1272 ;
   1273 ; SSE41-LABEL: shuffle_v4i32_zz4z:
   1274 ; SSE41:       # %bb.0:
   1275 ; SSE41-NEXT:    pxor %xmm1, %xmm1
   1276 ; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
   1277 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
   1278 ; SSE41-NEXT:    retq
   1279 ;
   1280 ; AVX1-LABEL: shuffle_v4i32_zz4z:
   1281 ; AVX1:       # %bb.0:
   1282 ; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1283 ; AVX1-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
   1284 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,0,1]
   1285 ; AVX1-NEXT:    retq
   1286 ;
   1287 ; AVX2-SLOW-LABEL: shuffle_v4i32_zz4z:
   1288 ; AVX2-SLOW:       # %bb.0:
   1289 ; AVX2-SLOW-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1290 ; AVX2-SLOW-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
   1291 ; AVX2-SLOW-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,0,1]
   1292 ; AVX2-SLOW-NEXT:    retq
   1293 ;
   1294 ; AVX2-FAST-LABEL: shuffle_v4i32_zz4z:
   1295 ; AVX2-FAST:       # %bb.0:
   1296 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
   1297 ; AVX2-FAST-NEXT:    retq
   1298 ;
   1299 ; AVX512VL-LABEL: shuffle_v4i32_zz4z:
   1300 ; AVX512VL:       # %bb.0:
   1301 ; AVX512VL-NEXT:    vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
   1302 ; AVX512VL-NEXT:    retq
   1303   %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0>
   1304   ret <4 x i32> %shuffle
   1305 }
   1306 
   1307 define <4 x i32> @shuffle_v4i32_zuu4(<4 x i32> %a) {
   1308 ; SSE-LABEL: shuffle_v4i32_zuu4:
   1309 ; SSE:       # %bb.0:
   1310 ; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
   1311 ; SSE-NEXT:    retq
   1312 ;
   1313 ; AVX-LABEL: shuffle_v4i32_zuu4:
   1314 ; AVX:       # %bb.0:
   1315 ; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
   1316 ; AVX-NEXT:    retq
   1317   %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 4>
   1318   ret <4 x i32> %shuffle
   1319 }
   1320 
   1321 define <4 x i32> @shuffle_v4i32_z6zz(<4 x i32> %a) {
   1322 ; SSE2-LABEL: shuffle_v4i32_z6zz:
   1323 ; SSE2:       # %bb.0:
   1324 ; SSE2-NEXT:    xorps %xmm1, %xmm1
   1325 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
   1326 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
   1327 ; SSE2-NEXT:    retq
   1328 ;
   1329 ; SSE3-LABEL: shuffle_v4i32_z6zz:
   1330 ; SSE3:       # %bb.0:
   1331 ; SSE3-NEXT:    xorps %xmm1, %xmm1
   1332 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
   1333 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
   1334 ; SSE3-NEXT:    retq
   1335 ;
   1336 ; SSSE3-LABEL: shuffle_v4i32_z6zz:
   1337 ; SSSE3:       # %bb.0:
   1338 ; SSSE3-NEXT:    xorps %xmm1, %xmm1
   1339 ; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
   1340 ; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
   1341 ; SSSE3-NEXT:    retq
   1342 ;
   1343 ; SSE41-LABEL: shuffle_v4i32_z6zz:
   1344 ; SSE41:       # %bb.0:
   1345 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
   1346 ; SSE41-NEXT:    pxor %xmm0, %xmm0
   1347 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
   1348 ; SSE41-NEXT:    retq
   1349 ;
   1350 ; AVX1-LABEL: shuffle_v4i32_z6zz:
   1351 ; AVX1:       # %bb.0:
   1352 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
   1353 ; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1354 ; AVX1-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
   1355 ; AVX1-NEXT:    retq
   1356 ;
   1357 ; AVX2-SLOW-LABEL: shuffle_v4i32_z6zz:
   1358 ; AVX2-SLOW:       # %bb.0:
   1359 ; AVX2-SLOW-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
   1360 ; AVX2-SLOW-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1361 ; AVX2-SLOW-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
   1362 ; AVX2-SLOW-NEXT:    retq
   1363 ;
   1364 ; AVX2-FAST-LABEL: shuffle_v4i32_z6zz:
   1365 ; AVX2-FAST:       # %bb.0:
   1366 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[8,9,10,11],zero,zero,zero,zero,zero,zero,zero,zero
   1367 ; AVX2-FAST-NEXT:    retq
   1368 ;
   1369 ; AVX512VL-LABEL: shuffle_v4i32_z6zz:
   1370 ; AVX512VL:       # %bb.0:
   1371 ; AVX512VL-NEXT:    vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[8,9,10,11],zero,zero,zero,zero,zero,zero,zero,zero
   1372 ; AVX512VL-NEXT:    retq
   1373   %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
   1374   ret <4 x i32> %shuffle
   1375 }
   1376 
   1377 define <4 x i32> @shuffle_v4i32_7012(<4 x i32> %a, <4 x i32> %b) {
   1378 ; SSE2-LABEL: shuffle_v4i32_7012:
   1379 ; SSE2:       # %bb.0:
   1380 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[0,0]
   1381 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
   1382 ; SSE2-NEXT:    movaps %xmm1, %xmm0
   1383 ; SSE2-NEXT:    retq
   1384 ;
   1385 ; SSE3-LABEL: shuffle_v4i32_7012:
   1386 ; SSE3:       # %bb.0:
   1387 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[0,0]
   1388 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
   1389 ; SSE3-NEXT:    movaps %xmm1, %xmm0
   1390 ; SSE3-NEXT:    retq
   1391 ;
   1392 ; SSSE3-LABEL: shuffle_v4i32_7012:
   1393 ; SSSE3:       # %bb.0:
   1394 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
   1395 ; SSSE3-NEXT:    retq
   1396 ;
   1397 ; SSE41-LABEL: shuffle_v4i32_7012:
   1398 ; SSE41:       # %bb.0:
   1399 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
   1400 ; SSE41-NEXT:    retq
   1401 ;
   1402 ; AVX-LABEL: shuffle_v4i32_7012:
   1403 ; AVX:       # %bb.0:
   1404 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
   1405 ; AVX-NEXT:    retq
   1406   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
   1407   ret <4 x i32> %shuffle
   1408 }
   1409 
   1410 define <4 x i32> @shuffle_v4i32_6701(<4 x i32> %a, <4 x i32> %b) {
   1411 ; SSE2-LABEL: shuffle_v4i32_6701:
   1412 ; SSE2:       # %bb.0:
   1413 ; SSE2-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
   1414 ; SSE2-NEXT:    movapd %xmm1, %xmm0
   1415 ; SSE2-NEXT:    retq
   1416 ;
   1417 ; SSE3-LABEL: shuffle_v4i32_6701:
   1418 ; SSE3:       # %bb.0:
   1419 ; SSE3-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
   1420 ; SSE3-NEXT:    movapd %xmm1, %xmm0
   1421 ; SSE3-NEXT:    retq
   1422 ;
   1423 ; SSSE3-LABEL: shuffle_v4i32_6701:
   1424 ; SSSE3:       # %bb.0:
   1425 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
   1426 ; SSSE3-NEXT:    retq
   1427 ;
   1428 ; SSE41-LABEL: shuffle_v4i32_6701:
   1429 ; SSE41:       # %bb.0:
   1430 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
   1431 ; SSE41-NEXT:    retq
   1432 ;
   1433 ; AVX-LABEL: shuffle_v4i32_6701:
   1434 ; AVX:       # %bb.0:
   1435 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
   1436 ; AVX-NEXT:    retq
   1437   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
   1438   ret <4 x i32> %shuffle
   1439 }
   1440 
   1441 define <4 x i32> @shuffle_v4i32_5670(<4 x i32> %a, <4 x i32> %b) {
   1442 ; SSE2-LABEL: shuffle_v4i32_5670:
   1443 ; SSE2:       # %bb.0:
   1444 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
   1445 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[2,0]
   1446 ; SSE2-NEXT:    movaps %xmm1, %xmm0
   1447 ; SSE2-NEXT:    retq
   1448 ;
   1449 ; SSE3-LABEL: shuffle_v4i32_5670:
   1450 ; SSE3:       # %bb.0:
   1451 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
   1452 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[2,0]
   1453 ; SSE3-NEXT:    movaps %xmm1, %xmm0
   1454 ; SSE3-NEXT:    retq
   1455 ;
   1456 ; SSSE3-LABEL: shuffle_v4i32_5670:
   1457 ; SSSE3:       # %bb.0:
   1458 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
   1459 ; SSSE3-NEXT:    retq
   1460 ;
   1461 ; SSE41-LABEL: shuffle_v4i32_5670:
   1462 ; SSE41:       # %bb.0:
   1463 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
   1464 ; SSE41-NEXT:    retq
   1465 ;
   1466 ; AVX-LABEL: shuffle_v4i32_5670:
   1467 ; AVX:       # %bb.0:
   1468 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
   1469 ; AVX-NEXT:    retq
   1470   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 5, i32 6, i32 7, i32 0>
   1471   ret <4 x i32> %shuffle
   1472 }
   1473 
   1474 define <4 x i32> @shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b) {
   1475 ; SSE2-LABEL: shuffle_v4i32_1234:
   1476 ; SSE2:       # %bb.0:
   1477 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
   1478 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
   1479 ; SSE2-NEXT:    retq
   1480 ;
   1481 ; SSE3-LABEL: shuffle_v4i32_1234:
   1482 ; SSE3:       # %bb.0:
   1483 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
   1484 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
   1485 ; SSE3-NEXT:    retq
   1486 ;
   1487 ; SSSE3-LABEL: shuffle_v4i32_1234:
   1488 ; SSSE3:       # %bb.0:
   1489 ; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
   1490 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
   1491 ; SSSE3-NEXT:    retq
   1492 ;
   1493 ; SSE41-LABEL: shuffle_v4i32_1234:
   1494 ; SSE41:       # %bb.0:
   1495 ; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
   1496 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
   1497 ; SSE41-NEXT:    retq
   1498 ;
   1499 ; AVX-LABEL: shuffle_v4i32_1234:
   1500 ; AVX:       # %bb.0:
   1501 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
   1502 ; AVX-NEXT:    retq
   1503   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
   1504   ret <4 x i32> %shuffle
   1505 }
   1506 
   1507 define <4 x i32> @shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b) {
   1508 ; SSE2-LABEL: shuffle_v4i32_2345:
   1509 ; SSE2:       # %bb.0:
   1510 ; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
   1511 ; SSE2-NEXT:    retq
   1512 ;
   1513 ; SSE3-LABEL: shuffle_v4i32_2345:
   1514 ; SSE3:       # %bb.0:
   1515 ; SSE3-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
   1516 ; SSE3-NEXT:    retq
   1517 ;
   1518 ; SSSE3-LABEL: shuffle_v4i32_2345:
   1519 ; SSSE3:       # %bb.0:
   1520 ; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
   1521 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
   1522 ; SSSE3-NEXT:    retq
   1523 ;
   1524 ; SSE41-LABEL: shuffle_v4i32_2345:
   1525 ; SSE41:       # %bb.0:
   1526 ; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
   1527 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
   1528 ; SSE41-NEXT:    retq
   1529 ;
   1530 ; AVX-LABEL: shuffle_v4i32_2345:
   1531 ; AVX:       # %bb.0:
   1532 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
   1533 ; AVX-NEXT:    retq
   1534   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
   1535   ret <4 x i32> %shuffle
   1536 }
   1537 
   1538 ; PR22391
   1539 define <4 x i32> @shuffle_v4i32_2456(<4 x i32> %a, <4 x i32> %b) {
   1540 ; SSE2-LABEL: shuffle_v4i32_2456:
   1541 ; SSE2:       # %bb.0:
   1542 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
   1543 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2]
   1544 ; SSE2-NEXT:    retq
   1545 ;
   1546 ; SSE3-LABEL: shuffle_v4i32_2456:
   1547 ; SSE3:       # %bb.0:
   1548 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
   1549 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2]
   1550 ; SSE3-NEXT:    retq
   1551 ;
   1552 ; SSSE3-LABEL: shuffle_v4i32_2456:
   1553 ; SSSE3:       # %bb.0:
   1554 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,2,2]
   1555 ; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
   1556 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
   1557 ; SSSE3-NEXT:    retq
   1558 ;
   1559 ; SSE41-LABEL: shuffle_v4i32_2456:
   1560 ; SSE41:       # %bb.0:
   1561 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,2,2]
   1562 ; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
   1563 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
   1564 ; SSE41-NEXT:    retq
   1565 ;
   1566 ; AVX-LABEL: shuffle_v4i32_2456:
   1567 ; AVX:       # %bb.0:
   1568 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,2]
   1569 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
   1570 ; AVX-NEXT:    retq
   1571   %s1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
   1572   %s2 = shufflevector <4 x i32> %s1, <4 x i32> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
   1573   ret <4 x i32> %s2
   1574 }
   1575 
   1576 define <4 x i32> @shuffle_v4i32_40u1(<4 x i32> %a, <4 x i32> %b) {
   1577 ; SSE-LABEL: shuffle_v4i32_40u1:
   1578 ; SSE:       # %bb.0:
   1579 ; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
   1580 ; SSE-NEXT:    movaps %xmm1, %xmm0
   1581 ; SSE-NEXT:    retq
   1582 ;
   1583 ; AVX-LABEL: shuffle_v4i32_40u1:
   1584 ; AVX:       # %bb.0:
   1585 ; AVX-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
   1586 ; AVX-NEXT:    retq
   1587   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 undef, i32 1>
   1588   ret <4 x i32> %shuffle
   1589 }
   1590 
   1591 define <4 x i32> @shuffle_v4i32_3456(<4 x i32> %a, <4 x i32> %b) {
   1592 ; SSE2-LABEL: shuffle_v4i32_3456:
   1593 ; SSE2:       # %bb.0:
   1594 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[0,0]
   1595 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2]
   1596 ; SSE2-NEXT:    retq
   1597 ;
   1598 ; SSE3-LABEL: shuffle_v4i32_3456:
   1599 ; SSE3:       # %bb.0:
   1600 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[0,0]
   1601 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2]
   1602 ; SSE3-NEXT:    retq
   1603 ;
   1604 ; SSSE3-LABEL: shuffle_v4i32_3456:
   1605 ; SSSE3:       # %bb.0:
   1606 ; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
   1607 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
   1608 ; SSSE3-NEXT:    retq
   1609 ;
   1610 ; SSE41-LABEL: shuffle_v4i32_3456:
   1611 ; SSE41:       # %bb.0:
   1612 ; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
   1613 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
   1614 ; SSE41-NEXT:    retq
   1615 ;
   1616 ; AVX-LABEL: shuffle_v4i32_3456:
   1617 ; AVX:       # %bb.0:
   1618 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
   1619 ; AVX-NEXT:    retq
   1620   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
   1621   ret <4 x i32> %shuffle
   1622 }
   1623 
   1624 define <4 x i32> @shuffle_v4i32_0u1u(<4 x i32> %a, <4 x i32> %b) {
   1625 ; SSE2-LABEL: shuffle_v4i32_0u1u:
   1626 ; SSE2:       # %bb.0:
   1627 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
   1628 ; SSE2-NEXT:    retq
   1629 ;
   1630 ; SSE3-LABEL: shuffle_v4i32_0u1u:
   1631 ; SSE3:       # %bb.0:
   1632 ; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
   1633 ; SSE3-NEXT:    retq
   1634 ;
   1635 ; SSSE3-LABEL: shuffle_v4i32_0u1u:
   1636 ; SSSE3:       # %bb.0:
   1637 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
   1638 ; SSSE3-NEXT:    retq
   1639 ;
   1640 ; SSE41-LABEL: shuffle_v4i32_0u1u:
   1641 ; SSE41:       # %bb.0:
   1642 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
   1643 ; SSE41-NEXT:    retq
   1644 ;
   1645 ; AVX-LABEL: shuffle_v4i32_0u1u:
   1646 ; AVX:       # %bb.0:
   1647 ; AVX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
   1648 ; AVX-NEXT:    retq
   1649   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
   1650   ret <4 x i32> %shuffle
   1651 }
   1652 
   1653 define <4 x i32> @shuffle_v4i32_0z1z(<4 x i32> %a) {
   1654 ; SSE2-LABEL: shuffle_v4i32_0z1z:
   1655 ; SSE2:       # %bb.0:
   1656 ; SSE2-NEXT:    xorps %xmm1, %xmm1
   1657 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   1658 ; SSE2-NEXT:    retq
   1659 ;
   1660 ; SSE3-LABEL: shuffle_v4i32_0z1z:
   1661 ; SSE3:       # %bb.0:
   1662 ; SSE3-NEXT:    xorps %xmm1, %xmm1
   1663 ; SSE3-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   1664 ; SSE3-NEXT:    retq
   1665 ;
   1666 ; SSSE3-LABEL: shuffle_v4i32_0z1z:
   1667 ; SSSE3:       # %bb.0:
   1668 ; SSSE3-NEXT:    xorps %xmm1, %xmm1
   1669 ; SSSE3-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   1670 ; SSSE3-NEXT:    retq
   1671 ;
   1672 ; SSE41-LABEL: shuffle_v4i32_0z1z:
   1673 ; SSE41:       # %bb.0:
   1674 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
   1675 ; SSE41-NEXT:    retq
   1676 ;
   1677 ; AVX-LABEL: shuffle_v4i32_0z1z:
   1678 ; AVX:       # %bb.0:
   1679 ; AVX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
   1680 ; AVX-NEXT:    retq
   1681   %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
   1682   ret <4 x i32> %shuffle
   1683 }
   1684 
   1685 define <4 x i32> @shuffle_v4i32_01zu(<4 x i32> %a) {
   1686 ; SSE-LABEL: shuffle_v4i32_01zu:
   1687 ; SSE:       # %bb.0:
   1688 ; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
   1689 ; SSE-NEXT:    retq
   1690 ;
   1691 ; AVX-LABEL: shuffle_v4i32_01zu:
   1692 ; AVX:       # %bb.0:
   1693 ; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
   1694 ; AVX-NEXT:    retq
   1695   %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 7, i32 undef>
   1696   ret <4 x i32> %shuffle
   1697 }
   1698 
   1699 define <4 x i32> @shuffle_v4i32_0z23(<4 x i32> %a) {
   1700 ; SSE2-LABEL: shuffle_v4i32_0z23:
   1701 ; SSE2:       # %bb.0:
   1702 ; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
   1703 ; SSE2-NEXT:    retq
   1704 ;
   1705 ; SSE3-LABEL: shuffle_v4i32_0z23:
   1706 ; SSE3:       # %bb.0:
   1707 ; SSE3-NEXT:    andps {{.*}}(%rip), %xmm0
   1708 ; SSE3-NEXT:    retq
   1709 ;
   1710 ; SSSE3-LABEL: shuffle_v4i32_0z23:
   1711 ; SSSE3:       # %bb.0:
   1712 ; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
   1713 ; SSSE3-NEXT:    retq
   1714 ;
   1715 ; SSE41-LABEL: shuffle_v4i32_0z23:
   1716 ; SSE41:       # %bb.0:
   1717 ; SSE41-NEXT:    xorps %xmm1, %xmm1
   1718 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
   1719 ; SSE41-NEXT:    retq
   1720 ;
   1721 ; AVX-LABEL: shuffle_v4i32_0z23:
   1722 ; AVX:       # %bb.0:
   1723 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1724 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
   1725 ; AVX-NEXT:    retq
   1726   %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
   1727   ret <4 x i32> %shuffle
   1728 }
   1729 
   1730 define <4 x i32> @shuffle_v4i32_01z3(<4 x i32> %a) {
   1731 ; SSE2-LABEL: shuffle_v4i32_01z3:
   1732 ; SSE2:       # %bb.0:
   1733 ; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
   1734 ; SSE2-NEXT:    retq
   1735 ;
   1736 ; SSE3-LABEL: shuffle_v4i32_01z3:
   1737 ; SSE3:       # %bb.0:
   1738 ; SSE3-NEXT:    andps {{.*}}(%rip), %xmm0
   1739 ; SSE3-NEXT:    retq
   1740 ;
   1741 ; SSSE3-LABEL: shuffle_v4i32_01z3:
   1742 ; SSSE3:       # %bb.0:
   1743 ; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
   1744 ; SSSE3-NEXT:    retq
   1745 ;
   1746 ; SSE41-LABEL: shuffle_v4i32_01z3:
   1747 ; SSE41:       # %bb.0:
   1748 ; SSE41-NEXT:    xorps %xmm1, %xmm1
   1749 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
   1750 ; SSE41-NEXT:    retq
   1751 ;
   1752 ; AVX-LABEL: shuffle_v4i32_01z3:
   1753 ; AVX:       # %bb.0:
   1754 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1755 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
   1756 ; AVX-NEXT:    retq
   1757   %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
   1758   ret <4 x i32> %shuffle
   1759 }
   1760 
   1761 define <4 x i32> @shuffle_v4i32_012z(<4 x i32> %a) {
   1762 ; SSE2-LABEL: shuffle_v4i32_012z:
   1763 ; SSE2:       # %bb.0:
   1764 ; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
   1765 ; SSE2-NEXT:    retq
   1766 ;
   1767 ; SSE3-LABEL: shuffle_v4i32_012z:
   1768 ; SSE3:       # %bb.0:
   1769 ; SSE3-NEXT:    andps {{.*}}(%rip), %xmm0
   1770 ; SSE3-NEXT:    retq
   1771 ;
   1772 ; SSSE3-LABEL: shuffle_v4i32_012z:
   1773 ; SSSE3:       # %bb.0:
   1774 ; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
   1775 ; SSSE3-NEXT:    retq
   1776 ;
   1777 ; SSE41-LABEL: shuffle_v4i32_012z:
   1778 ; SSE41:       # %bb.0:
   1779 ; SSE41-NEXT:    xorps %xmm1, %xmm1
   1780 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
   1781 ; SSE41-NEXT:    retq
   1782 ;
   1783 ; AVX-LABEL: shuffle_v4i32_012z:
   1784 ; AVX:       # %bb.0:
   1785 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1786 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
   1787 ; AVX-NEXT:    retq
   1788   %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
   1789   ret <4 x i32> %shuffle
   1790 }
   1791 
   1792 define <4 x i32> @shuffle_v4i32_0zz3(<4 x i32> %a) {
   1793 ; SSE2-LABEL: shuffle_v4i32_0zz3:
   1794 ; SSE2:       # %bb.0:
   1795 ; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
   1796 ; SSE2-NEXT:    retq
   1797 ;
   1798 ; SSE3-LABEL: shuffle_v4i32_0zz3:
   1799 ; SSE3:       # %bb.0:
   1800 ; SSE3-NEXT:    andps {{.*}}(%rip), %xmm0
   1801 ; SSE3-NEXT:    retq
   1802 ;
   1803 ; SSSE3-LABEL: shuffle_v4i32_0zz3:
   1804 ; SSSE3:       # %bb.0:
   1805 ; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
   1806 ; SSSE3-NEXT:    retq
   1807 ;
   1808 ; SSE41-LABEL: shuffle_v4i32_0zz3:
   1809 ; SSE41:       # %bb.0:
   1810 ; SSE41-NEXT:    xorps %xmm1, %xmm1
   1811 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
   1812 ; SSE41-NEXT:    retq
   1813 ;
   1814 ; AVX-LABEL: shuffle_v4i32_0zz3:
   1815 ; AVX:       # %bb.0:
   1816 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1817 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
   1818 ; AVX-NEXT:    retq
   1819   %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 3>
   1820   ret <4 x i32> %shuffle
   1821 }
   1822 
   1823 define <4 x i32> @shuffle_v4i32_bitcast_0415(<4 x i32> %a, <4 x i32> %b) {
   1824 ; SSE-LABEL: shuffle_v4i32_bitcast_0415:
   1825 ; SSE:       # %bb.0:
   1826 ; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   1827 ; SSE-NEXT:    retq
   1828 ;
   1829 ; AVX-LABEL: shuffle_v4i32_bitcast_0415:
   1830 ; AVX:       # %bb.0:
   1831 ; AVX-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   1832 ; AVX-NEXT:    retq
   1833   %shuffle32 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 0, i32 4>
   1834   %bitcast64 = bitcast <4 x i32> %shuffle32 to <2 x double>
   1835   %shuffle64 = shufflevector <2 x double> %bitcast64, <2 x double> undef, <2 x i32> <i32 1, i32 0>
   1836   %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x i32>
   1837   ret <4 x i32> %bitcast32
   1838 }
   1839 
   1840 define <4 x float> @shuffle_v4f32_bitcast_4401(<4 x float> %a, <4 x i32> %b) {
   1841 ; SSE-LABEL: shuffle_v4f32_bitcast_4401:
   1842 ; SSE:       # %bb.0:
   1843 ; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,1]
   1844 ; SSE-NEXT:    movaps %xmm1, %xmm0
   1845 ; SSE-NEXT:    retq
   1846 ;
   1847 ; AVX1OR2-LABEL: shuffle_v4f32_bitcast_4401:
   1848 ; AVX1OR2:       # %bb.0:
   1849 ; AVX1OR2-NEXT:    vshufps {{.*#+}} xmm0 = xmm1[0,0],xmm0[0,1]
   1850 ; AVX1OR2-NEXT:    retq
   1851 ;
   1852 ; AVX512VL-LABEL: shuffle_v4f32_bitcast_4401:
   1853 ; AVX512VL:       # %bb.0:
   1854 ; AVX512VL-NEXT:    vbroadcastss %xmm1, %xmm1
   1855 ; AVX512VL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
   1856 ; AVX512VL-NEXT:    retq
   1857   %1 = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
   1858   %2 = bitcast <4 x i32> %1 to <2 x double>
   1859   %3 = bitcast <4 x float> %a to <2 x double>
   1860   %4 = shufflevector <2 x double> %2, <2 x double> %3, <2 x i32> <i32 0, i32 2>
   1861   %5 = bitcast <2 x double> %4 to <4 x float>
   1862   ret <4 x float> %5
   1863 }
   1864 
   1865 define <4 x float> @shuffle_v4f32_bitcast_0045(<4 x float> %a, <4 x i32> %b) {
   1866 ; SSE-LABEL: shuffle_v4f32_bitcast_0045:
   1867 ; SSE:       # %bb.0:
   1868 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,1]
   1869 ; SSE-NEXT:    retq
   1870 ;
   1871 ; AVX-LABEL: shuffle_v4f32_bitcast_0045:
   1872 ; AVX:       # %bb.0:
   1873 ; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,1]
   1874 ; AVX-NEXT:    retq
   1875   %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
   1876   %2 = bitcast <4 x i32> %b to <4 x float>
   1877   %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 1, i32 0, i32 4, i32 5>
   1878   ret <4 x float> %3
   1879 }
   1880 
   1881 define <4 x float> @mask_v4f32_4127(<4 x float> %a, <4 x float> %b) {
   1882 ; SSE2-LABEL: mask_v4f32_4127:
   1883 ; SSE2:       # %bb.0:
   1884 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,3],xmm0[1,2]
   1885 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2,3,1]
   1886 ; SSE2-NEXT:    movaps %xmm1, %xmm0
   1887 ; SSE2-NEXT:    retq
   1888 ;
   1889 ; SSE3-LABEL: mask_v4f32_4127:
   1890 ; SSE3:       # %bb.0:
   1891 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,3],xmm0[1,2]
   1892 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2,3,1]
   1893 ; SSE3-NEXT:    movaps %xmm1, %xmm0
   1894 ; SSE3-NEXT:    retq
   1895 ;
   1896 ; SSSE3-LABEL: mask_v4f32_4127:
   1897 ; SSSE3:       # %bb.0:
   1898 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,3],xmm0[1,2]
   1899 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2,3,1]
   1900 ; SSSE3-NEXT:    movaps %xmm1, %xmm0
   1901 ; SSSE3-NEXT:    retq
   1902 ;
   1903 ; SSE41-LABEL: mask_v4f32_4127:
   1904 ; SSE41:       # %bb.0:
   1905 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
   1906 ; SSE41-NEXT:    retq
   1907 ;
   1908 ; AVX-LABEL: mask_v4f32_4127:
   1909 ; AVX:       # %bb.0:
   1910 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
   1911 ; AVX-NEXT:    retq
   1912   %1 = bitcast <4 x float> %a to <4 x i32>
   1913   %2 = bitcast <4 x float> %b to <4 x i32>
   1914   %3 = and <4 x i32> %1, <i32  0, i32 -1, i32 -1, i32  0>
   1915   %4 = and <4 x i32> %2, <i32 -1, i32  0, i32  0, i32 -1>
   1916   %5 = or <4 x i32> %4, %3
   1917   %6 = bitcast <4 x i32> %5 to <4 x float>
   1918   ret <4 x float> %6
   1919 }
   1920 
   1921 define <4 x float> @mask_v4f32_0127(<4 x float> %a, <4 x float> %b) {
   1922 ; SSE2-LABEL: mask_v4f32_0127:
   1923 ; SSE2:       # %bb.0:
   1924 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
   1925 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
   1926 ; SSE2-NEXT:    movaps %xmm1, %xmm0
   1927 ; SSE2-NEXT:    retq
   1928 ;
   1929 ; SSE3-LABEL: mask_v4f32_0127:
   1930 ; SSE3:       # %bb.0:
   1931 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
   1932 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
   1933 ; SSE3-NEXT:    movaps %xmm1, %xmm0
   1934 ; SSE3-NEXT:    retq
   1935 ;
   1936 ; SSSE3-LABEL: mask_v4f32_0127:
   1937 ; SSSE3:       # %bb.0:
   1938 ; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
   1939 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
   1940 ; SSSE3-NEXT:    movaps %xmm1, %xmm0
   1941 ; SSSE3-NEXT:    retq
   1942 ;
   1943 ; SSE41-LABEL: mask_v4f32_0127:
   1944 ; SSE41:       # %bb.0:
   1945 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
   1946 ; SSE41-NEXT:    retq
   1947 ;
   1948 ; AVX-LABEL: mask_v4f32_0127:
   1949 ; AVX:       # %bb.0:
   1950 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
   1951 ; AVX-NEXT:    retq
   1952   %1 = bitcast <4 x float> %a to <2 x i64>
   1953   %2 = bitcast <4 x float> %b to <2 x i64>
   1954   %3 = and <2 x i64> %1, <i64 0, i64 -4294967296>
   1955   %4 = and <2 x i64> %2, <i64 -1, i64 4294967295>
   1956   %5 = or <2 x i64> %4, %3
   1957   %6 = bitcast <2 x i64> %5 to <4 x float>
   1958   ret <4 x float> %6
   1959 }
   1960 
   1961 define <4 x i32> @mask_v4i32_0127(<4 x i32> %a, <4 x i32> %b) {
   1962 ; SSE2-LABEL: mask_v4i32_0127:
   1963 ; SSE2:       # %bb.0:
   1964 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
   1965 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
   1966 ; SSE2-NEXT:    movaps %xmm1, %xmm0
   1967 ; SSE2-NEXT:    retq
   1968 ;
   1969 ; SSE3-LABEL: mask_v4i32_0127:
   1970 ; SSE3:       # %bb.0:
   1971 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
   1972 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
   1973 ; SSE3-NEXT:    movaps %xmm1, %xmm0
   1974 ; SSE3-NEXT:    retq
   1975 ;
   1976 ; SSSE3-LABEL: mask_v4i32_0127:
   1977 ; SSSE3:       # %bb.0:
   1978 ; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
   1979 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
   1980 ; SSSE3-NEXT:    movaps %xmm1, %xmm0
   1981 ; SSSE3-NEXT:    retq
   1982 ;
   1983 ; SSE41-LABEL: mask_v4i32_0127:
   1984 ; SSE41:       # %bb.0:
   1985 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
   1986 ; SSE41-NEXT:    retq
   1987 ;
   1988 ; AVX-LABEL: mask_v4i32_0127:
   1989 ; AVX:       # %bb.0:
   1990 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
   1991 ; AVX-NEXT:    retq
   1992   %1 = bitcast <4 x i32> %a to <2 x i64>
   1993   %2 = bitcast <4 x i32> %b to <2 x i64>
   1994   %3 = and <2 x i64> %1, <i64 0, i64 -4294967296>
   1995   %4 = and <2 x i64> %2, <i64 -1, i64 4294967295>
   1996   %5 = or <2 x i64> %4, %3
   1997   %6 = bitcast <2 x i64> %5 to <4 x i32>
   1998   ret <4 x i32> %6
   1999 }
   2000 
   2001 define <4 x float> @broadcast_v4f32_0101_from_v2f32(<2 x float>* %x) {
   2002 ; SSE2-LABEL: broadcast_v4f32_0101_from_v2f32:
   2003 ; SSE2:       # %bb.0:
   2004 ; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
   2005 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
   2006 ; SSE2-NEXT:    retq
   2007 ;
   2008 ; SSE3-LABEL: broadcast_v4f32_0101_from_v2f32:
   2009 ; SSE3:       # %bb.0:
   2010 ; SSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
   2011 ; SSE3-NEXT:    retq
   2012 ;
   2013 ; SSSE3-LABEL: broadcast_v4f32_0101_from_v2f32:
   2014 ; SSSE3:       # %bb.0:
   2015 ; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
   2016 ; SSSE3-NEXT:    retq
   2017 ;
   2018 ; SSE41-LABEL: broadcast_v4f32_0101_from_v2f32:
   2019 ; SSE41:       # %bb.0:
   2020 ; SSE41-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
   2021 ; SSE41-NEXT:    retq
   2022 ;
   2023 ; AVX-LABEL: broadcast_v4f32_0101_from_v2f32:
   2024 ; AVX:       # %bb.0:
   2025 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
   2026 ; AVX-NEXT:    retq
   2027   %1 = load <2 x float>, <2 x float>* %x, align 1
   2028   %2 = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
   2029   ret <4 x float> %2
   2030 }
   2031 
   2032 define <4 x i32> @insert_reg_and_zero_v4i32(i32 %a) {
   2033 ; SSE-LABEL: insert_reg_and_zero_v4i32:
   2034 ; SSE:       # %bb.0:
   2035 ; SSE-NEXT:    movd %edi, %xmm0
   2036 ; SSE-NEXT:    retq
   2037 ;
   2038 ; AVX-LABEL: insert_reg_and_zero_v4i32:
   2039 ; AVX:       # %bb.0:
   2040 ; AVX-NEXT:    vmovd %edi, %xmm0
   2041 ; AVX-NEXT:    retq
   2042   %v = insertelement <4 x i32> undef, i32 %a, i32 0
   2043   %shuffle = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   2044   ret <4 x i32> %shuffle
   2045 }
   2046 
   2047 define <4 x i32> @insert_mem_and_zero_v4i32(i32* %ptr) {
   2048 ; SSE-LABEL: insert_mem_and_zero_v4i32:
   2049 ; SSE:       # %bb.0:
   2050 ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2051 ; SSE-NEXT:    retq
   2052 ;
   2053 ; AVX-LABEL: insert_mem_and_zero_v4i32:
   2054 ; AVX:       # %bb.0:
   2055 ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2056 ; AVX-NEXT:    retq
   2057   %a = load i32, i32* %ptr
   2058   %v = insertelement <4 x i32> undef, i32 %a, i32 0
   2059   %shuffle = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   2060   ret <4 x i32> %shuffle
   2061 }
   2062 
   2063 define <4 x float> @insert_reg_and_zero_v4f32(float %a) {
   2064 ; SSE2-LABEL: insert_reg_and_zero_v4f32:
   2065 ; SSE2:       # %bb.0:
   2066 ; SSE2-NEXT:    xorps %xmm1, %xmm1
   2067 ; SSE2-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
   2068 ; SSE2-NEXT:    movaps %xmm1, %xmm0
   2069 ; SSE2-NEXT:    retq
   2070 ;
   2071 ; SSE3-LABEL: insert_reg_and_zero_v4f32:
   2072 ; SSE3:       # %bb.0:
   2073 ; SSE3-NEXT:    xorps %xmm1, %xmm1
   2074 ; SSE3-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
   2075 ; SSE3-NEXT:    movaps %xmm1, %xmm0
   2076 ; SSE3-NEXT:    retq
   2077 ;
   2078 ; SSSE3-LABEL: insert_reg_and_zero_v4f32:
   2079 ; SSSE3:       # %bb.0:
   2080 ; SSSE3-NEXT:    xorps %xmm1, %xmm1
   2081 ; SSSE3-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
   2082 ; SSSE3-NEXT:    movaps %xmm1, %xmm0
   2083 ; SSSE3-NEXT:    retq
   2084 ;
   2085 ; SSE41-LABEL: insert_reg_and_zero_v4f32:
   2086 ; SSE41:       # %bb.0:
   2087 ; SSE41-NEXT:    xorps %xmm1, %xmm1
   2088 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
   2089 ; SSE41-NEXT:    retq
   2090 ;
   2091 ; AVX-LABEL: insert_reg_and_zero_v4f32:
   2092 ; AVX:       # %bb.0:
   2093 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   2094 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
   2095 ; AVX-NEXT:    retq
   2096   %v = insertelement <4 x float> undef, float %a, i32 0
   2097   %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   2098   ret <4 x float> %shuffle
   2099 }
   2100 
   2101 define <4 x float> @insert_mem_and_zero_v4f32(float* %ptr) {
   2102 ; SSE-LABEL: insert_mem_and_zero_v4f32:
   2103 ; SSE:       # %bb.0:
   2104 ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2105 ; SSE-NEXT:    retq
   2106 ;
   2107 ; AVX-LABEL: insert_mem_and_zero_v4f32:
   2108 ; AVX:       # %bb.0:
   2109 ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2110 ; AVX-NEXT:    retq
   2111   %a = load float, float* %ptr
   2112   %v = insertelement <4 x float> undef, float %a, i32 0
   2113   %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   2114   ret <4 x float> %shuffle
   2115 }
   2116 
   2117 define <4 x i32> @insert_reg_lo_v4i32(i64 %a, <4 x i32> %b) {
   2118 ; SSE2-LABEL: insert_reg_lo_v4i32:
   2119 ; SSE2:       # %bb.0:
   2120 ; SSE2-NEXT:    movq %rdi, %xmm1
   2121 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
   2122 ; SSE2-NEXT:    retq
   2123 ;
   2124 ; SSE3-LABEL: insert_reg_lo_v4i32:
   2125 ; SSE3:       # %bb.0:
   2126 ; SSE3-NEXT:    movq %rdi, %xmm1
   2127 ; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
   2128 ; SSE3-NEXT:    retq
   2129 ;
   2130 ; SSSE3-LABEL: insert_reg_lo_v4i32:
   2131 ; SSSE3:       # %bb.0:
   2132 ; SSSE3-NEXT:    movq %rdi, %xmm1
   2133 ; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
   2134 ; SSSE3-NEXT:    retq
   2135 ;
   2136 ; SSE41-LABEL: insert_reg_lo_v4i32:
   2137 ; SSE41:       # %bb.0:
   2138 ; SSE41-NEXT:    movq %rdi, %xmm1
   2139 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
   2140 ; SSE41-NEXT:    retq
   2141 ;
   2142 ; AVX1-LABEL: insert_reg_lo_v4i32:
   2143 ; AVX1:       # %bb.0:
   2144 ; AVX1-NEXT:    vmovq %rdi, %xmm1
   2145 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
   2146 ; AVX1-NEXT:    retq
   2147 ;
   2148 ; AVX2OR512VL-LABEL: insert_reg_lo_v4i32:
   2149 ; AVX2OR512VL:       # %bb.0:
   2150 ; AVX2OR512VL-NEXT:    vmovq %rdi, %xmm1
   2151 ; AVX2OR512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
   2152 ; AVX2OR512VL-NEXT:    retq
   2153   %a.cast = bitcast i64 %a to <2 x i32>
   2154   %v = shufflevector <2 x i32> %a.cast, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   2155   %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
   2156   ret <4 x i32> %shuffle
   2157 }
   2158 
   2159 define <4 x i32> @insert_mem_lo_v4i32(<2 x i32>* %ptr, <4 x i32> %b) {
   2160 ; SSE2-LABEL: insert_mem_lo_v4i32:
   2161 ; SSE2:       # %bb.0:
   2162 ; SSE2-NEXT:    movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
   2163 ; SSE2-NEXT:    retq
   2164 ;
   2165 ; SSE3-LABEL: insert_mem_lo_v4i32:
   2166 ; SSE3:       # %bb.0:
   2167 ; SSE3-NEXT:    movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
   2168 ; SSE3-NEXT:    retq
   2169 ;
   2170 ; SSSE3-LABEL: insert_mem_lo_v4i32:
   2171 ; SSSE3:       # %bb.0:
   2172 ; SSSE3-NEXT:    movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
   2173 ; SSSE3-NEXT:    retq
   2174 ;
   2175 ; SSE41-LABEL: insert_mem_lo_v4i32:
   2176 ; SSE41:       # %bb.0:
   2177 ; SSE41-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
   2178 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
   2179 ; SSE41-NEXT:    retq
   2180 ;
   2181 ; AVX-LABEL: insert_mem_lo_v4i32:
   2182 ; AVX:       # %bb.0:
   2183 ; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
   2184 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
   2185 ; AVX-NEXT:    retq
   2186   %a = load <2 x i32>, <2 x i32>* %ptr
   2187   %v = shufflevector <2 x i32> %a, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   2188   %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
   2189   ret <4 x i32> %shuffle
   2190 }
   2191 
   2192 define <4 x i32> @insert_reg_hi_v4i32(i64 %a, <4 x i32> %b) {
   2193 ; SSE-LABEL: insert_reg_hi_v4i32:
   2194 ; SSE:       # %bb.0:
   2195 ; SSE-NEXT:    movq %rdi, %xmm1
   2196 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   2197 ; SSE-NEXT:    retq
   2198 ;
   2199 ; AVX-LABEL: insert_reg_hi_v4i32:
   2200 ; AVX:       # %bb.0:
   2201 ; AVX-NEXT:    vmovq %rdi, %xmm1
   2202 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   2203 ; AVX-NEXT:    retq
   2204   %a.cast = bitcast i64 %a to <2 x i32>
   2205   %v = shufflevector <2 x i32> %a.cast, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   2206   %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
   2207   ret <4 x i32> %shuffle
   2208 }
   2209 
   2210 define <4 x i32> @insert_mem_hi_v4i32(<2 x i32>* %ptr, <4 x i32> %b) {
   2211 ; SSE-LABEL: insert_mem_hi_v4i32:
   2212 ; SSE:       # %bb.0:
   2213 ; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
   2214 ; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   2215 ; SSE-NEXT:    retq
   2216 ;
   2217 ; AVX-LABEL: insert_mem_hi_v4i32:
   2218 ; AVX:       # %bb.0:
   2219 ; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
   2220 ; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   2221 ; AVX-NEXT:    retq
   2222   %a = load <2 x i32>, <2 x i32>* %ptr
   2223   %v = shufflevector <2 x i32> %a, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   2224   %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
   2225   ret <4 x i32> %shuffle
   2226 }
   2227 
   2228 define <4 x float> @insert_reg_lo_v4f32(double %a, <4 x float> %b) {
   2229 ; SSE2-LABEL: insert_reg_lo_v4f32:
   2230 ; SSE2:       # %bb.0:
   2231 ; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
   2232 ; SSE2-NEXT:    movapd %xmm1, %xmm0
   2233 ; SSE2-NEXT:    retq
   2234 ;
   2235 ; SSE3-LABEL: insert_reg_lo_v4f32:
   2236 ; SSE3:       # %bb.0:
   2237 ; SSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
   2238 ; SSE3-NEXT:    movapd %xmm1, %xmm0
   2239 ; SSE3-NEXT:    retq
   2240 ;
   2241 ; SSSE3-LABEL: insert_reg_lo_v4f32:
   2242 ; SSSE3:       # %bb.0:
   2243 ; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
   2244 ; SSSE3-NEXT:    movapd %xmm1, %xmm0
   2245 ; SSSE3-NEXT:    retq
   2246 ;
   2247 ; SSE41-LABEL: insert_reg_lo_v4f32:
   2248 ; SSE41:       # %bb.0:
   2249 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
   2250 ; SSE41-NEXT:    retq
   2251 ;
   2252 ; AVX-LABEL: insert_reg_lo_v4f32:
   2253 ; AVX:       # %bb.0:
   2254 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
   2255 ; AVX-NEXT:    retq
   2256   %a.cast = bitcast double %a to <2 x float>
   2257   %v = shufflevector <2 x float> %a.cast, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   2258   %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
   2259   ret <4 x float> %shuffle
   2260 }
   2261 
   2262 define <4 x float> @insert_mem_lo_v4f32(<2 x float>* %ptr, <4 x float> %b) {
   2263 ; SSE-LABEL: insert_mem_lo_v4f32:
   2264 ; SSE:       # %bb.0:
   2265 ; SSE-NEXT:    movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
   2266 ; SSE-NEXT:    retq
   2267 ;
   2268 ; AVX-LABEL: insert_mem_lo_v4f32:
   2269 ; AVX:       # %bb.0:
   2270 ; AVX-NEXT:    vmovlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
   2271 ; AVX-NEXT:    retq
   2272   %a = load <2 x float>, <2 x float>* %ptr
   2273   %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   2274   %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
   2275   ret <4 x float> %shuffle
   2276 }
   2277 
   2278 define <4 x float> @insert_reg_hi_v4f32(double %a, <4 x float> %b) {
   2279 ; SSE-LABEL: insert_reg_hi_v4f32:
   2280 ; SSE:       # %bb.0:
   2281 ; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
   2282 ; SSE-NEXT:    movaps %xmm1, %xmm0
   2283 ; SSE-NEXT:    retq
   2284 ;
   2285 ; AVX-LABEL: insert_reg_hi_v4f32:
   2286 ; AVX:       # %bb.0:
   2287 ; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
   2288 ; AVX-NEXT:    retq
   2289   %a.cast = bitcast double %a to <2 x float>
   2290   %v = shufflevector <2 x float> %a.cast, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   2291   %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
   2292   ret <4 x float> %shuffle
   2293 }
   2294 
   2295 define <4 x float> @insert_mem_hi_v4f32(<2 x float>* %ptr, <4 x float> %b) {
   2296 ; SSE-LABEL: insert_mem_hi_v4f32:
   2297 ; SSE:       # %bb.0:
   2298 ; SSE-NEXT:    movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
   2299 ; SSE-NEXT:    retq
   2300 ;
   2301 ; AVX-LABEL: insert_mem_hi_v4f32:
   2302 ; AVX:       # %bb.0:
   2303 ; AVX-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
   2304 ; AVX-NEXT:    retq
   2305   %a = load <2 x float>, <2 x float>* %ptr
   2306   %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   2307   %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
   2308   ret <4 x float> %shuffle
   2309 }
   2310 
   2311 ; PR21137
   2312 define <4 x float> @shuffle_mem_v4f32_3210(<4 x float>* %ptr) {
   2313 ; SSE-LABEL: shuffle_mem_v4f32_3210:
   2314 ; SSE:       # %bb.0:
   2315 ; SSE-NEXT:    movaps (%rdi), %xmm0
   2316 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
   2317 ; SSE-NEXT:    retq
   2318 ;
   2319 ; AVX-LABEL: shuffle_mem_v4f32_3210:
   2320 ; AVX:       # %bb.0:
   2321 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = mem[3,2,1,0]
   2322 ; AVX-NEXT:    retq
   2323   %a = load <4 x float>, <4 x float>* %ptr
   2324   %shuffle = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   2325   ret <4 x float> %shuffle
   2326 }
   2327 
   2328 define <4 x i32> @insert_dup_mem_v4i32(i32* %ptr) {
   2329 ; SSE-LABEL: insert_dup_mem_v4i32:
   2330 ; SSE:       # %bb.0:
   2331 ; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2332 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
   2333 ; SSE-NEXT:    retq
   2334 ;
   2335 ; AVX-LABEL: insert_dup_mem_v4i32:
   2336 ; AVX:       # %bb.0:
   2337 ; AVX-NEXT:    vbroadcastss (%rdi), %xmm0
   2338 ; AVX-NEXT:    retq
   2339   %tmp = load i32, i32* %ptr, align 4
   2340   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
   2341   %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
   2342   ret <4 x i32> %tmp2
   2343 }
   2344 
   2345 ;
   2346 ; Shuffle to logical bit shifts
   2347 ;
   2348 
   2349 define <4 x i32> @shuffle_v4i32_z0zX(<4 x i32> %a) {
   2350 ; SSE-LABEL: shuffle_v4i32_z0zX:
   2351 ; SSE:       # %bb.0:
   2352 ; SSE-NEXT:    psllq $32, %xmm0
   2353 ; SSE-NEXT:    retq
   2354 ;
   2355 ; AVX-LABEL: shuffle_v4i32_z0zX:
   2356 ; AVX:       # %bb.0:
   2357 ; AVX-NEXT:    vpsllq $32, %xmm0, %xmm0
   2358 ; AVX-NEXT:    retq
   2359   %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 4, i32 0, i32 4, i32 undef>
   2360   ret <4 x i32> %shuffle
   2361 }
   2362 
   2363 define <4 x i32> @shuffle_v4i32_1z3z(<4 x i32> %a) {
   2364 ; SSE-LABEL: shuffle_v4i32_1z3z:
   2365 ; SSE:       # %bb.0:
   2366 ; SSE-NEXT:    psrlq $32, %xmm0
   2367 ; SSE-NEXT:    retq
   2368 ;
   2369 ; AVX-LABEL: shuffle_v4i32_1z3z:
   2370 ; AVX:       # %bb.0:
   2371 ; AVX-NEXT:    vpsrlq $32, %xmm0, %xmm0
   2372 ; AVX-NEXT:    retq
   2373   %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 4, i32 3, i32 4>
   2374   ret <4 x i32> %shuffle
   2375 }
   2376