Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX1
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-SLOW
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-FAST
      8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-SLOW
      9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-FAST
     10 
     11 define <8 x i16> @shuffle_v8i16_01012323(<8 x i16> %a, <8 x i16> %b) {
     12 ; SSE-LABEL: shuffle_v8i16_01012323:
     13 ; SSE:       # %bb.0:
     14 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
     15 ; SSE-NEXT:    retq
     16 ;
     17 ; AVX-LABEL: shuffle_v8i16_01012323:
     18 ; AVX:       # %bb.0:
     19 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
     20 ; AVX-NEXT:    retq
     21   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3>
     22   ret <8 x i16> %shuffle
     23 }
     24 define <8 x i16> @shuffle_v8i16_67452301(<8 x i16> %a, <8 x i16> %b) {
     25 ; SSE-LABEL: shuffle_v8i16_67452301:
     26 ; SSE:       # %bb.0:
     27 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
     28 ; SSE-NEXT:    retq
     29 ;
     30 ; AVX-LABEL: shuffle_v8i16_67452301:
     31 ; AVX:       # %bb.0:
     32 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
     33 ; AVX-NEXT:    retq
     34   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
     35   ret <8 x i16> %shuffle
     36 }
     37 define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) {
     38 ; SSE2-LABEL: shuffle_v8i16_456789AB:
     39 ; SSE2:       # %bb.0:
     40 ; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
     41 ; SSE2-NEXT:    retq
     42 ;
     43 ; SSSE3-LABEL: shuffle_v8i16_456789AB:
     44 ; SSSE3:       # %bb.0:
     45 ; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
     46 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
     47 ; SSSE3-NEXT:    retq
     48 ;
     49 ; SSE41-LABEL: shuffle_v8i16_456789AB:
     50 ; SSE41:       # %bb.0:
     51 ; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
     52 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
     53 ; SSE41-NEXT:    retq
     54 ;
     55 ; AVX-LABEL: shuffle_v8i16_456789AB:
     56 ; AVX:       # %bb.0:
     57 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
     58 ; AVX-NEXT:    retq
     59   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
     60   ret <8 x i16> %shuffle
     61 }
     62 
     63 define <8 x i16> @shuffle_v8i16_00000000(<8 x i16> %a, <8 x i16> %b) {
     64 ; SSE-LABEL: shuffle_v8i16_00000000:
     65 ; SSE:       # %bb.0:
     66 ; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
     67 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
     68 ; SSE-NEXT:    retq
     69 ;
     70 ; AVX1-LABEL: shuffle_v8i16_00000000:
     71 ; AVX1:       # %bb.0:
     72 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
     73 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
     74 ; AVX1-NEXT:    retq
     75 ;
     76 ; AVX2OR512VL-LABEL: shuffle_v8i16_00000000:
     77 ; AVX2OR512VL:       # %bb.0:
     78 ; AVX2OR512VL-NEXT:    vpbroadcastw %xmm0, %xmm0
     79 ; AVX2OR512VL-NEXT:    retq
     80   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
     81   ret <8 x i16> %shuffle
     82 }
     83 define <8 x i16> @shuffle_v8i16_00004444(<8 x i16> %a, <8 x i16> %b) {
     84 ; SSE-LABEL: shuffle_v8i16_00004444:
     85 ; SSE:       # %bb.0:
     86 ; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
     87 ; SSE-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
     88 ; SSE-NEXT:    retq
     89 ;
     90 ; AVX1-LABEL: shuffle_v8i16_00004444:
     91 ; AVX1:       # %bb.0:
     92 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
     93 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
     94 ; AVX1-NEXT:    retq
     95 ;
     96 ; AVX2-SLOW-LABEL: shuffle_v8i16_00004444:
     97 ; AVX2-SLOW:       # %bb.0:
     98 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
     99 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
    100 ; AVX2-SLOW-NEXT:    retq
    101 ;
    102 ; AVX2-FAST-LABEL: shuffle_v8i16_00004444:
    103 ; AVX2-FAST:       # %bb.0:
    104 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9]
    105 ; AVX2-FAST-NEXT:    retq
    106 ;
    107 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_00004444:
    108 ; AVX512VL-SLOW:       # %bb.0:
    109 ; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
    110 ; AVX512VL-SLOW-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
    111 ; AVX512VL-SLOW-NEXT:    retq
    112 ;
    113 ; AVX512VL-FAST-LABEL: shuffle_v8i16_00004444:
    114 ; AVX512VL-FAST:       # %bb.0:
    115 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9]
    116 ; AVX512VL-FAST-NEXT:    retq
    117   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
    118   ret <8 x i16> %shuffle
    119 }
    120 define <8 x i16> @shuffle_v8i16_u0u1u2u3(<8 x i16> %a, <8 x i16> %b) {
    121 ; SSE-LABEL: shuffle_v8i16_u0u1u2u3:
    122 ; SSE:       # %bb.0:
    123 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
    124 ; SSE-NEXT:    retq
    125 ;
    126 ; AVX-LABEL: shuffle_v8i16_u0u1u2u3:
    127 ; AVX:       # %bb.0:
    128 ; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
    129 ; AVX-NEXT:    retq
    130   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3>
    131   ret <8 x i16> %shuffle
    132 }
    133 define <8 x i16> @shuffle_v8i16_u4u5u6u7(<8 x i16> %a, <8 x i16> %b) {
    134 ; SSE-LABEL: shuffle_v8i16_u4u5u6u7:
    135 ; SSE:       # %bb.0:
    136 ; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
    137 ; SSE-NEXT:    retq
    138 ;
    139 ; AVX-LABEL: shuffle_v8i16_u4u5u6u7:
    140 ; AVX:       # %bb.0:
    141 ; AVX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
    142 ; AVX-NEXT:    retq
    143   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7>
    144   ret <8 x i16> %shuffle
    145 }
    146 define <8 x i16> @shuffle_v8i16_31206745(<8 x i16> %a, <8 x i16> %b) {
    147 ; SSE-LABEL: shuffle_v8i16_31206745:
    148 ; SSE:       # %bb.0:
    149 ; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
    150 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
    151 ; SSE-NEXT:    retq
    152 ;
    153 ; AVX1-LABEL: shuffle_v8i16_31206745:
    154 ; AVX1:       # %bb.0:
    155 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
    156 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
    157 ; AVX1-NEXT:    retq
    158 ;
    159 ; AVX2-SLOW-LABEL: shuffle_v8i16_31206745:
    160 ; AVX2-SLOW:       # %bb.0:
    161 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
    162 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
    163 ; AVX2-SLOW-NEXT:    retq
    164 ;
    165 ; AVX2-FAST-LABEL: shuffle_v8i16_31206745:
    166 ; AVX2-FAST:       # %bb.0:
    167 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,7,2,3,4,5,0,1,12,13,14,15,8,9,10,11]
    168 ; AVX2-FAST-NEXT:    retq
    169 ;
    170 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_31206745:
    171 ; AVX512VL-SLOW:       # %bb.0:
    172 ; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
    173 ; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
    174 ; AVX512VL-SLOW-NEXT:    retq
    175 ;
    176 ; AVX512VL-FAST-LABEL: shuffle_v8i16_31206745:
    177 ; AVX512VL-FAST:       # %bb.0:
    178 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,7,2,3,4,5,0,1,12,13,14,15,8,9,10,11]
    179 ; AVX512VL-FAST-NEXT:    retq
    180   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 5>
    181   ret <8 x i16> %shuffle
    182 }
    183 define <8 x i16> @shuffle_v8i16_44440000(<8 x i16> %a, <8 x i16> %b) {
    184 ; SSE2-LABEL: shuffle_v8i16_44440000:
    185 ; SSE2:       # %bb.0:
    186 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
    187 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
    188 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
    189 ; SSE2-NEXT:    retq
    190 ;
    191 ; SSSE3-LABEL: shuffle_v8i16_44440000:
    192 ; SSSE3:       # %bb.0:
    193 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
    194 ; SSSE3-NEXT:    retq
    195 ;
    196 ; SSE41-LABEL: shuffle_v8i16_44440000:
    197 ; SSE41:       # %bb.0:
    198 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
    199 ; SSE41-NEXT:    retq
    200 ;
    201 ; AVX-LABEL: shuffle_v8i16_44440000:
    202 ; AVX:       # %bb.0:
    203 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
    204 ; AVX-NEXT:    retq
    205   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0>
    206   ret <8 x i16> %shuffle
    207 }
    208 define <8 x i16> @shuffle_v8i16_23016745(<8 x i16> %a, <8 x i16> %b) {
    209 ; SSE-LABEL: shuffle_v8i16_23016745:
    210 ; SSE:       # %bb.0:
    211 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
    212 ; SSE-NEXT:    retq
    213 ;
    214 ; AVX-LABEL: shuffle_v8i16_23016745:
    215 ; AVX:       # %bb.0:
    216 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,0,3,2]
    217 ; AVX-NEXT:    retq
    218   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
    219   ret <8 x i16> %shuffle
    220 }
    221 define <8 x i16> @shuffle_v8i16_23026745(<8 x i16> %a, <8 x i16> %b) {
    222 ; SSE-LABEL: shuffle_v8i16_23026745:
    223 ; SSE:       # %bb.0:
    224 ; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
    225 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
    226 ; SSE-NEXT:    retq
    227 ;
    228 ; AVX1-LABEL: shuffle_v8i16_23026745:
    229 ; AVX1:       # %bb.0:
    230 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
    231 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
    232 ; AVX1-NEXT:    retq
    233 ;
    234 ; AVX2-SLOW-LABEL: shuffle_v8i16_23026745:
    235 ; AVX2-SLOW:       # %bb.0:
    236 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
    237 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
    238 ; AVX2-SLOW-NEXT:    retq
    239 ;
    240 ; AVX2-FAST-LABEL: shuffle_v8i16_23026745:
    241 ; AVX2-FAST:       # %bb.0:
    242 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,4,5,12,13,14,15,8,9,10,11]
    243 ; AVX2-FAST-NEXT:    retq
    244 ;
    245 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_23026745:
    246 ; AVX512VL-SLOW:       # %bb.0:
    247 ; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
    248 ; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
    249 ; AVX512VL-SLOW-NEXT:    retq
    250 ;
    251 ; AVX512VL-FAST-LABEL: shuffle_v8i16_23026745:
    252 ; AVX512VL-FAST:       # %bb.0:
    253 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,4,5,12,13,14,15,8,9,10,11]
    254 ; AVX512VL-FAST-NEXT:    retq
    255   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 5>
    256   ret <8 x i16> %shuffle
    257 }
    258 define <8 x i16> @shuffle_v8i16_23016747(<8 x i16> %a, <8 x i16> %b) {
    259 ; SSE-LABEL: shuffle_v8i16_23016747:
    260 ; SSE:       # %bb.0:
    261 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
    262 ; SSE-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
    263 ; SSE-NEXT:    retq
    264 ;
    265 ; AVX1-LABEL: shuffle_v8i16_23016747:
    266 ; AVX1:       # %bb.0:
    267 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
    268 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
    269 ; AVX1-NEXT:    retq
    270 ;
    271 ; AVX2-SLOW-LABEL: shuffle_v8i16_23016747:
    272 ; AVX2-SLOW:       # %bb.0:
    273 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
    274 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
    275 ; AVX2-SLOW-NEXT:    retq
    276 ;
    277 ; AVX2-FAST-LABEL: shuffle_v8i16_23016747:
    278 ; AVX2-FAST:       # %bb.0:
    279 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3,12,13,14,15,8,9,14,15]
    280 ; AVX2-FAST-NEXT:    retq
    281 ;
    282 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_23016747:
    283 ; AVX512VL-SLOW:       # %bb.0:
    284 ; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
    285 ; AVX512VL-SLOW-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
    286 ; AVX512VL-SLOW-NEXT:    retq
    287 ;
    288 ; AVX512VL-FAST-LABEL: shuffle_v8i16_23016747:
    289 ; AVX512VL-FAST:       # %bb.0:
    290 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3,12,13,14,15,8,9,14,15]
    291 ; AVX512VL-FAST-NEXT:    retq
    292   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 7>
    293   ret <8 x i16> %shuffle
    294 }
    295 define <8 x i16> @shuffle_v8i16_75643120(<8 x i16> %a, <8 x i16> %b) {
    296 ; SSE2-LABEL: shuffle_v8i16_75643120:
    297 ; SSE2:       # %bb.0:
    298 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    299 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
    300 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
    301 ; SSE2-NEXT:    retq
    302 ;
    303 ; SSSE3-LABEL: shuffle_v8i16_75643120:
    304 ; SSSE3:       # %bb.0:
    305 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
    306 ; SSSE3-NEXT:    retq
    307 ;
    308 ; SSE41-LABEL: shuffle_v8i16_75643120:
    309 ; SSE41:       # %bb.0:
    310 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
    311 ; SSE41-NEXT:    retq
    312 ;
    313 ; AVX-LABEL: shuffle_v8i16_75643120:
    314 ; AVX:       # %bb.0:
    315 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
    316 ; AVX-NEXT:    retq
    317   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 0>
    318   ret <8 x i16> %shuffle
    319 }
    320 
    321 define <8 x i16> @shuffle_v8i16_10545410(<8 x i16> %a, <8 x i16> %b) {
    322 ; SSE2-LABEL: shuffle_v8i16_10545410:
    323 ; SSE2:       # %bb.0:
    324 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
    325 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
    326 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
    327 ; SSE2-NEXT:    retq
    328 ;
    329 ; SSSE3-LABEL: shuffle_v8i16_10545410:
    330 ; SSSE3:       # %bb.0:
    331 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
    332 ; SSSE3-NEXT:    retq
    333 ;
    334 ; SSE41-LABEL: shuffle_v8i16_10545410:
    335 ; SSE41:       # %bb.0:
    336 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
    337 ; SSE41-NEXT:    retq
    338 ;
    339 ; AVX-LABEL: shuffle_v8i16_10545410:
    340 ; AVX:       # %bb.0:
    341 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
    342 ; AVX-NEXT:    retq
    343   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 0>
    344   ret <8 x i16> %shuffle
    345 }
    346 define <8 x i16> @shuffle_v8i16_54105410(<8 x i16> %a, <8 x i16> %b) {
    347 ; SSE2-LABEL: shuffle_v8i16_54105410:
    348 ; SSE2:       # %bb.0:
    349 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
    350 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
    351 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
    352 ; SSE2-NEXT:    retq
    353 ;
    354 ; SSSE3-LABEL: shuffle_v8i16_54105410:
    355 ; SSSE3:       # %bb.0:
    356 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
    357 ; SSSE3-NEXT:    retq
    358 ;
    359 ; SSE41-LABEL: shuffle_v8i16_54105410:
    360 ; SSE41:       # %bb.0:
    361 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
    362 ; SSE41-NEXT:    retq
    363 ;
    364 ; AVX-LABEL: shuffle_v8i16_54105410:
    365 ; AVX:       # %bb.0:
    366 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
    367 ; AVX-NEXT:    retq
    368   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 0>
    369   ret <8 x i16> %shuffle
    370 }
    371 define <8 x i16> @shuffle_v8i16_54101054(<8 x i16> %a, <8 x i16> %b) {
    372 ; SSE2-LABEL: shuffle_v8i16_54101054:
    373 ; SSE2:       # %bb.0:
    374 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
    375 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
    376 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
    377 ; SSE2-NEXT:    retq
    378 ;
    379 ; SSSE3-LABEL: shuffle_v8i16_54101054:
    380 ; SSSE3:       # %bb.0:
    381 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
    382 ; SSSE3-NEXT:    retq
    383 ;
    384 ; SSE41-LABEL: shuffle_v8i16_54101054:
    385 ; SSE41:       # %bb.0:
    386 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
    387 ; SSE41-NEXT:    retq
    388 ;
    389 ; AVX-LABEL: shuffle_v8i16_54101054:
    390 ; AVX:       # %bb.0:
    391 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
    392 ; AVX-NEXT:    retq
    393   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 4>
    394   ret <8 x i16> %shuffle
    395 }
    396 define <8 x i16> @shuffle_v8i16_04400440(<8 x i16> %a, <8 x i16> %b) {
    397 ; SSE2-LABEL: shuffle_v8i16_04400440:
    398 ; SSE2:       # %bb.0:
    399 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
    400 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
    401 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,4,6]
    402 ; SSE2-NEXT:    retq
    403 ;
    404 ; SSSE3-LABEL: shuffle_v8i16_04400440:
    405 ; SSSE3:       # %bb.0:
    406 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
    407 ; SSSE3-NEXT:    retq
    408 ;
    409 ; SSE41-LABEL: shuffle_v8i16_04400440:
    410 ; SSE41:       # %bb.0:
    411 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
    412 ; SSE41-NEXT:    retq
    413 ;
    414 ; AVX-LABEL: shuffle_v8i16_04400440:
    415 ; AVX:       # %bb.0:
    416 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
    417 ; AVX-NEXT:    retq
    418   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 0>
    419   ret <8 x i16> %shuffle
    420 }
    421 define <8 x i16> @shuffle_v8i16_40044004(<8 x i16> %a, <8 x i16> %b) {
    422 ; SSE2-LABEL: shuffle_v8i16_40044004:
    423 ; SSE2:       # %bb.0:
    424 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
    425 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,0,0,2,4,5,6,7]
    426 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,4]
    427 ; SSE2-NEXT:    retq
    428 ;
    429 ; SSSE3-LABEL: shuffle_v8i16_40044004:
    430 ; SSSE3:       # %bb.0:
    431 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
    432 ; SSSE3-NEXT:    retq
    433 ;
    434 ; SSE41-LABEL: shuffle_v8i16_40044004:
    435 ; SSE41:       # %bb.0:
    436 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
    437 ; SSE41-NEXT:    retq
    438 ;
    439 ; AVX-LABEL: shuffle_v8i16_40044004:
    440 ; AVX:       # %bb.0:
    441 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
    442 ; AVX-NEXT:    retq
    443   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 4>
    444   ret <8 x i16> %shuffle
    445 }
    446 
    447 define <8 x i16> @shuffle_v8i16_26405173(<8 x i16> %a, <8 x i16> %b) {
    448 ; SSE2-LABEL: shuffle_v8i16_26405173:
    449 ; SSE2:       # %bb.0:
    450 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
    451 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
    452 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
    453 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
    454 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
    455 ; SSE2-NEXT:    retq
    456 ;
    457 ; SSSE3-LABEL: shuffle_v8i16_26405173:
    458 ; SSSE3:       # %bb.0:
    459 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
    460 ; SSSE3-NEXT:    retq
    461 ;
    462 ; SSE41-LABEL: shuffle_v8i16_26405173:
    463 ; SSE41:       # %bb.0:
    464 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
    465 ; SSE41-NEXT:    retq
    466 ;
    467 ; AVX-LABEL: shuffle_v8i16_26405173:
    468 ; AVX:       # %bb.0:
    469 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
    470 ; AVX-NEXT:    retq
    471   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 3>
    472   ret <8 x i16> %shuffle
    473 }
    474 define <8 x i16> @shuffle_v8i16_20645173(<8 x i16> %a, <8 x i16> %b) {
    475 ; SSE2-LABEL: shuffle_v8i16_20645173:
    476 ; SSE2:       # %bb.0:
    477 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
    478 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
    479 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
    480 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7]
    481 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
    482 ; SSE2-NEXT:    retq
    483 ;
    484 ; SSSE3-LABEL: shuffle_v8i16_20645173:
    485 ; SSSE3:       # %bb.0:
    486 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
    487 ; SSSE3-NEXT:    retq
    488 ;
    489 ; SSE41-LABEL: shuffle_v8i16_20645173:
    490 ; SSE41:       # %bb.0:
    491 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
    492 ; SSE41-NEXT:    retq
    493 ;
    494 ; AVX-LABEL: shuffle_v8i16_20645173:
    495 ; AVX:       # %bb.0:
    496 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
    497 ; AVX-NEXT:    retq
    498   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 3>
    499   ret <8 x i16> %shuffle
    500 }
    501 define <8 x i16> @shuffle_v8i16_26401375(<8 x i16> %a, <8 x i16> %b) {
    502 ; SSE2-LABEL: shuffle_v8i16_26401375:
    503 ; SSE2:       # %bb.0:
    504 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
    505 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
    506 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
    507 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
    508 ; SSE2-NEXT:    retq
    509 ;
    510 ; SSSE3-LABEL: shuffle_v8i16_26401375:
    511 ; SSSE3:       # %bb.0:
    512 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
    513 ; SSSE3-NEXT:    retq
    514 ;
    515 ; SSE41-LABEL: shuffle_v8i16_26401375:
    516 ; SSE41:       # %bb.0:
    517 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
    518 ; SSE41-NEXT:    retq
    519 ;
    520 ; AVX-LABEL: shuffle_v8i16_26401375:
    521 ; AVX:       # %bb.0:
    522 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
    523 ; AVX-NEXT:    retq
    524   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 5>
    525   ret <8 x i16> %shuffle
    526 }
    527 
    528 define <8 x i16> @shuffle_v8i16_66751643(<8 x i16> %a, <8 x i16> %b) {
    529 ; SSE2-LABEL: shuffle_v8i16_66751643:
    530 ; SSE2:       # %bb.0:
    531 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7]
    532 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
    533 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,0]
    534 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,3,2,4,5,6,7]
    535 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,4,6]
    536 ; SSE2-NEXT:    retq
    537 ;
    538 ; SSSE3-LABEL: shuffle_v8i16_66751643:
    539 ; SSSE3:       # %bb.0:
    540 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
    541 ; SSSE3-NEXT:    retq
    542 ;
    543 ; SSE41-LABEL: shuffle_v8i16_66751643:
    544 ; SSE41:       # %bb.0:
    545 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
    546 ; SSE41-NEXT:    retq
    547 ;
    548 ; AVX-LABEL: shuffle_v8i16_66751643:
    549 ; AVX:       # %bb.0:
    550 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
    551 ; AVX-NEXT:    retq
    552   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 3>
    553   ret <8 x i16> %shuffle
    554 }
    555 
    556 define <8 x i16> @shuffle_v8i16_60514754(<8 x i16> %a, <8 x i16> %b) {
    557 ; SSE2-LABEL: shuffle_v8i16_60514754:
    558 ; SSE2:       # %bb.0:
    559 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
    560 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    561 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
    562 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,5,6]
    563 ; SSE2-NEXT:    retq
    564 ;
    565 ; SSSE3-LABEL: shuffle_v8i16_60514754:
    566 ; SSSE3:       # %bb.0:
    567 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
    568 ; SSSE3-NEXT:    retq
    569 ;
    570 ; SSE41-LABEL: shuffle_v8i16_60514754:
    571 ; SSE41:       # %bb.0:
    572 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
    573 ; SSE41-NEXT:    retq
    574 ;
    575 ; AVX-LABEL: shuffle_v8i16_60514754:
    576 ; AVX:       # %bb.0:
    577 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
    578 ; AVX-NEXT:    retq
    579   %shuffle = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 6, i32 0, i32 5, i32 1, i32 4, i32 7, i32 5, i32 4>
    580   ret <8 x i16> %shuffle
    581 }
    582 
    583 define <8 x i16> @shuffle_v8i16_00444444(<8 x i16> %a, <8 x i16> %b) {
    584 ; SSE2-LABEL: shuffle_v8i16_00444444:
    585 ; SSE2:       # %bb.0:
    586 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    587 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
    588 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
    589 ; SSE2-NEXT:    retq
    590 ;
    591 ; SSSE3-LABEL: shuffle_v8i16_00444444:
    592 ; SSSE3:       # %bb.0:
    593 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
    594 ; SSSE3-NEXT:    retq
    595 ;
    596 ; SSE41-LABEL: shuffle_v8i16_00444444:
    597 ; SSE41:       # %bb.0:
    598 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
    599 ; SSE41-NEXT:    retq
    600 ;
    601 ; AVX-LABEL: shuffle_v8i16_00444444:
    602 ; AVX:       # %bb.0:
    603 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
    604 ; AVX-NEXT:    retq
    605   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
    606   ret <8 x i16> %shuffle
    607 }
    608 define <8 x i16> @shuffle_v8i16_44004444(<8 x i16> %a, <8 x i16> %b) {
    609 ; SSE2-LABEL: shuffle_v8i16_44004444:
    610 ; SSE2:       # %bb.0:
    611 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    612 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,2,0,0,4,5,6,7]
    613 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
    614 ; SSE2-NEXT:    retq
    615 ;
    616 ; SSSE3-LABEL: shuffle_v8i16_44004444:
    617 ; SSSE3:       # %bb.0:
    618 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
    619 ; SSSE3-NEXT:    retq
    620 ;
    621 ; SSE41-LABEL: shuffle_v8i16_44004444:
    622 ; SSE41:       # %bb.0:
    623 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
    624 ; SSE41-NEXT:    retq
    625 ;
    626 ; AVX-LABEL: shuffle_v8i16_44004444:
    627 ; AVX:       # %bb.0:
    628 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
    629 ; AVX-NEXT:    retq
    630   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
    631   ret <8 x i16> %shuffle
    632 }
    633 define <8 x i16> @shuffle_v8i16_04404444(<8 x i16> %a, <8 x i16> %b) {
    634 ; SSE2-LABEL: shuffle_v8i16_04404444:
    635 ; SSE2:       # %bb.0:
    636 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    637 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
    638 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
    639 ; SSE2-NEXT:    retq
    640 ;
    641 ; SSSE3-LABEL: shuffle_v8i16_04404444:
    642 ; SSSE3:       # %bb.0:
    643 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
    644 ; SSSE3-NEXT:    retq
    645 ;
    646 ; SSE41-LABEL: shuffle_v8i16_04404444:
    647 ; SSE41:       # %bb.0:
    648 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
    649 ; SSE41-NEXT:    retq
    650 ;
    651 ; AVX-LABEL: shuffle_v8i16_04404444:
    652 ; AVX:       # %bb.0:
    653 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
    654 ; AVX-NEXT:    retq
    655   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
    656   ret <8 x i16> %shuffle
    657 }
    658 define <8 x i16> @shuffle_v8i16_04400000(<8 x i16> %a, <8 x i16> %b) {
    659 ; SSE2-LABEL: shuffle_v8i16_04400000:
    660 ; SSE2:       # %bb.0:
    661 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,0,3]
    662 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
    663 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
    664 ; SSE2-NEXT:    retq
    665 ;
    666 ; SSSE3-LABEL: shuffle_v8i16_04400000:
    667 ; SSSE3:       # %bb.0:
    668 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
    669 ; SSSE3-NEXT:    retq
    670 ;
    671 ; SSE41-LABEL: shuffle_v8i16_04400000:
    672 ; SSE41:       # %bb.0:
    673 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
    674 ; SSE41-NEXT:    retq
    675 ;
    676 ; AVX-LABEL: shuffle_v8i16_04400000:
    677 ; AVX:       # %bb.0:
    678 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
    679 ; AVX-NEXT:    retq
    680   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0>
    681   ret <8 x i16> %shuffle
    682 }
    683 define <8 x i16> @shuffle_v8i16_04404567(<8 x i16> %a, <8 x i16> %b) {
    684 ; SSE-LABEL: shuffle_v8i16_04404567:
    685 ; SSE:       # %bb.0:
    686 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    687 ; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
    688 ; SSE-NEXT:    retq
    689 ;
    690 ; AVX1-LABEL: shuffle_v8i16_04404567:
    691 ; AVX1:       # %bb.0:
    692 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    693 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
    694 ; AVX1-NEXT:    retq
    695 ;
    696 ; AVX2-SLOW-LABEL: shuffle_v8i16_04404567:
    697 ; AVX2-SLOW:       # %bb.0:
    698 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    699 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
    700 ; AVX2-SLOW-NEXT:    retq
    701 ;
    702 ; AVX2-FAST-LABEL: shuffle_v8i16_04404567:
    703 ; AVX2-FAST:       # %bb.0:
    704 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13,14,15]
    705 ; AVX2-FAST-NEXT:    retq
    706 ;
    707 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_04404567:
    708 ; AVX512VL-SLOW:       # %bb.0:
    709 ; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    710 ; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
    711 ; AVX512VL-SLOW-NEXT:    retq
    712 ;
    713 ; AVX512VL-FAST-LABEL: shuffle_v8i16_04404567:
    714 ; AVX512VL-FAST:       # %bb.0:
    715 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13,14,15]
    716 ; AVX512VL-FAST-NEXT:    retq
    717   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 7>
    718   ret <8 x i16> %shuffle
    719 }
    720 
    721 define <8 x i16> @shuffle_v8i16_0X444444(<8 x i16> %a, <8 x i16> %b) {
    722 ; SSE2-LABEL: shuffle_v8i16_0X444444:
    723 ; SSE2:       # %bb.0:
    724 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    725 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,2,4,5,6,7]
    726 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
    727 ; SSE2-NEXT:    retq
    728 ;
    729 ; SSSE3-LABEL: shuffle_v8i16_0X444444:
    730 ; SSSE3:       # %bb.0:
    731 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
    732 ; SSSE3-NEXT:    retq
    733 ;
    734 ; SSE41-LABEL: shuffle_v8i16_0X444444:
    735 ; SSE41:       # %bb.0:
    736 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
    737 ; SSE41-NEXT:    retq
    738 ;
    739 ; AVX-LABEL: shuffle_v8i16_0X444444:
    740 ; AVX:       # %bb.0:
    741 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
    742 ; AVX-NEXT:    retq
    743   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
    744   ret <8 x i16> %shuffle
    745 }
    746 define <8 x i16> @shuffle_v8i16_44X04444(<8 x i16> %a, <8 x i16> %b) {
    747 ; SSE2-LABEL: shuffle_v8i16_44X04444:
    748 ; SSE2:       # %bb.0:
    749 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    750 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,0,4,5,6,7]
    751 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
    752 ; SSE2-NEXT:    retq
    753 ;
    754 ; SSSE3-LABEL: shuffle_v8i16_44X04444:
    755 ; SSSE3:       # %bb.0:
    756 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
    757 ; SSSE3-NEXT:    retq
    758 ;
    759 ; SSE41-LABEL: shuffle_v8i16_44X04444:
    760 ; SSE41:       # %bb.0:
    761 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
    762 ; SSE41-NEXT:    retq
    763 ;
    764 ; AVX-LABEL: shuffle_v8i16_44X04444:
    765 ; AVX:       # %bb.0:
    766 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
    767 ; AVX-NEXT:    retq
    768   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 4>
    769   ret <8 x i16> %shuffle
    770 }
    771 define <8 x i16> @shuffle_v8i16_X4404444(<8 x i16> %a, <8 x i16> %b) {
    772 ; SSE2-LABEL: shuffle_v8i16_X4404444:
    773 ; SSE2:       # %bb.0:
    774 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    775 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
    776 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
    777 ; SSE2-NEXT:    retq
    778 ;
    779 ; SSSE3-LABEL: shuffle_v8i16_X4404444:
    780 ; SSSE3:       # %bb.0:
    781 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
    782 ; SSSE3-NEXT:    retq
    783 ;
    784 ; SSE41-LABEL: shuffle_v8i16_X4404444:
    785 ; SSE41:       # %bb.0:
    786 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
    787 ; SSE41-NEXT:    retq
    788 ;
    789 ; AVX-LABEL: shuffle_v8i16_X4404444:
    790 ; AVX:       # %bb.0:
    791 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
    792 ; AVX-NEXT:    retq
    793   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
    794   ret <8 x i16> %shuffle
    795 }
    796 
    797 define <8 x i16> @shuffle_v8i16_0127XXXX(<8 x i16> %a, <8 x i16> %b) {
    798 ; SSE2-LABEL: shuffle_v8i16_0127XXXX:
    799 ; SSE2:       # %bb.0:
    800 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
    801 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
    802 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    803 ; SSE2-NEXT:    retq
    804 ;
    805 ; SSSE3-LABEL: shuffle_v8i16_0127XXXX:
    806 ; SSSE3:       # %bb.0:
    807 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
    808 ; SSSE3-NEXT:    retq
    809 ;
    810 ; SSE41-LABEL: shuffle_v8i16_0127XXXX:
    811 ; SSE41:       # %bb.0:
    812 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
    813 ; SSE41-NEXT:    retq
    814 ;
    815 ; AVX-LABEL: shuffle_v8i16_0127XXXX:
    816 ; AVX:       # %bb.0:
    817 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
    818 ; AVX-NEXT:    retq
    819   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
    820   ret <8 x i16> %shuffle
    821 }
    822 
    823 define <8 x i16> @shuffle_v8i16_XXXX4563(<8 x i16> %a, <8 x i16> %b) {
    824 ; SSE2-LABEL: shuffle_v8i16_XXXX4563:
    825 ; SSE2:       # %bb.0:
    826 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
    827 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
    828 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
    829 ; SSE2-NEXT:    retq
    830 ;
    831 ; SSSE3-LABEL: shuffle_v8i16_XXXX4563:
    832 ; SSSE3:       # %bb.0:
    833 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
    834 ; SSSE3-NEXT:    retq
    835 ;
    836 ; SSE41-LABEL: shuffle_v8i16_XXXX4563:
    837 ; SSE41:       # %bb.0:
    838 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
    839 ; SSE41-NEXT:    retq
    840 ;
    841 ; AVX-LABEL: shuffle_v8i16_XXXX4563:
    842 ; AVX:       # %bb.0:
    843 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
    844 ; AVX-NEXT:    retq
    845   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 3>
    846   ret <8 x i16> %shuffle
    847 }
    848 
    849 define <8 x i16> @shuffle_v8i16_4563XXXX(<8 x i16> %a, <8 x i16> %b) {
    850 ; SSE2-LABEL: shuffle_v8i16_4563XXXX:
    851 ; SSE2:       # %bb.0:
    852 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
    853 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
    854 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3]
    855 ; SSE2-NEXT:    retq
    856 ;
    857 ; SSSE3-LABEL: shuffle_v8i16_4563XXXX:
    858 ; SSSE3:       # %bb.0:
    859 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
    860 ; SSSE3-NEXT:    retq
    861 ;
    862 ; SSE41-LABEL: shuffle_v8i16_4563XXXX:
    863 ; SSE41:       # %bb.0:
    864 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
    865 ; SSE41-NEXT:    retq
    866 ;
    867 ; AVX-LABEL: shuffle_v8i16_4563XXXX:
    868 ; AVX:       # %bb.0:
    869 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
    870 ; AVX-NEXT:    retq
    871   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
    872   ret <8 x i16> %shuffle
    873 }
    874 
    875 define <8 x i16> @shuffle_v8i16_01274563(<8 x i16> %a, <8 x i16> %b) {
    876 ; SSE2-LABEL: shuffle_v8i16_01274563:
    877 ; SSE2:       # %bb.0:
    878 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
    879 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
    880 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
    881 ; SSE2-NEXT:    retq
    882 ;
    883 ; SSSE3-LABEL: shuffle_v8i16_01274563:
    884 ; SSSE3:       # %bb.0:
    885 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
    886 ; SSSE3-NEXT:    retq
    887 ;
    888 ; SSE41-LABEL: shuffle_v8i16_01274563:
    889 ; SSE41:       # %bb.0:
    890 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
    891 ; SSE41-NEXT:    retq
    892 ;
    893 ; AVX-LABEL: shuffle_v8i16_01274563:
    894 ; AVX:       # %bb.0:
    895 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
    896 ; AVX-NEXT:    retq
    897   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 3>
    898   ret <8 x i16> %shuffle
    899 }
    900 
    901 define <8 x i16> @shuffle_v8i16_45630127(<8 x i16> %a, <8 x i16> %b) {
    902 ; SSE2-LABEL: shuffle_v8i16_45630127:
    903 ; SSE2:       # %bb.0:
    904 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
    905 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
    906 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1]
    907 ; SSE2-NEXT:    retq
    908 ;
    909 ; SSSE3-LABEL: shuffle_v8i16_45630127:
    910 ; SSSE3:       # %bb.0:
    911 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
    912 ; SSSE3-NEXT:    retq
    913 ;
    914 ; SSE41-LABEL: shuffle_v8i16_45630127:
    915 ; SSE41:       # %bb.0:
    916 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
    917 ; SSE41-NEXT:    retq
    918 ;
    919 ; AVX-LABEL: shuffle_v8i16_45630127:
    920 ; AVX:       # %bb.0:
    921 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
    922 ; AVX-NEXT:    retq
    923   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 7>
    924   ret <8 x i16> %shuffle
    925 }
    926 
    927 define <8 x i16> @shuffle_v8i16_37102735(<8 x i16> %a, <8 x i16> %b) {
    928 ; SSE2-LABEL: shuffle_v8i16_37102735:
    929 ; SSE2:       # %bb.0:
    930 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
    931 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
    932 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
    933 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    934 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
    935 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6]
    936 ; SSE2-NEXT:    retq
    937 ;
    938 ; SSSE3-LABEL: shuffle_v8i16_37102735:
    939 ; SSSE3:       # %bb.0:
    940 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
    941 ; SSSE3-NEXT:    retq
    942 ;
    943 ; SSE41-LABEL: shuffle_v8i16_37102735:
    944 ; SSE41:       # %bb.0:
    945 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
    946 ; SSE41-NEXT:    retq
    947 ;
    948 ; AVX-LABEL: shuffle_v8i16_37102735:
    949 ; AVX:       # %bb.0:
    950 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
    951 ; AVX-NEXT:    retq
    952   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 5>
    953   ret <8 x i16> %shuffle
    954 }
    955 
    956 define <8 x i16> @shuffle_v8i16_08192a3b(<8 x i16> %a, <8 x i16> %b) {
    957 ; SSE-LABEL: shuffle_v8i16_08192a3b:
    958 ; SSE:       # %bb.0:
    959 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    960 ; SSE-NEXT:    retq
    961 ;
    962 ; AVX-LABEL: shuffle_v8i16_08192a3b:
    963 ; AVX:       # %bb.0:
    964 ; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    965 ; AVX-NEXT:    retq
    966   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
    967   ret <8 x i16> %shuffle
    968 }
    969 
    970 define <8 x i16> @shuffle_v8i16_0c1d2e3f(<8 x i16> %a, <8 x i16> %b) {
    971 ; SSE-LABEL: shuffle_v8i16_0c1d2e3f:
    972 ; SSE:       # %bb.0:
    973 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
    974 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    975 ; SSE-NEXT:    retq
    976 ;
    977 ; AVX-LABEL: shuffle_v8i16_0c1d2e3f:
    978 ; AVX:       # %bb.0:
    979 ; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
    980 ; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    981 ; AVX-NEXT:    retq
    982   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 2, i32 14, i32 3, i32 15>
    983   ret <8 x i16> %shuffle
    984 }
    985 
    986 define <8 x i16> @shuffle_v8i16_4c5d6e7f(<8 x i16> %a, <8 x i16> %b) {
    987 ; SSE-LABEL: shuffle_v8i16_4c5d6e7f:
    988 ; SSE:       # %bb.0:
    989 ; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    990 ; SSE-NEXT:    retq
    991 ;
    992 ; AVX-LABEL: shuffle_v8i16_4c5d6e7f:
    993 ; AVX:       # %bb.0:
    994 ; AVX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    995 ; AVX-NEXT:    retq
    996   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
    997   ret <8 x i16> %shuffle
    998 }
    999 
   1000 define <8 x i16> @shuffle_v8i16_48596a7b(<8 x i16> %a, <8 x i16> %b) {
   1001 ; SSE-LABEL: shuffle_v8i16_48596a7b:
   1002 ; SSE:       # %bb.0:
   1003 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
   1004 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1005 ; SSE-NEXT:    retq
   1006 ;
   1007 ; AVX-LABEL: shuffle_v8i16_48596a7b:
   1008 ; AVX:       # %bb.0:
   1009 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
   1010 ; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1011 ; AVX-NEXT:    retq
   1012   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 8, i32 5, i32 9, i32 6, i32 10, i32 7, i32 11>
   1013   ret <8 x i16> %shuffle
   1014 }
   1015 
   1016 define <8 x i16> @shuffle_v8i16_08196e7f(<8 x i16> %a, <8 x i16> %b) {
   1017 ; SSE-LABEL: shuffle_v8i16_08196e7f:
   1018 ; SSE:       # %bb.0:
   1019 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
   1020 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
   1021 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1022 ; SSE-NEXT:    retq
   1023 ;
   1024 ; AVX-LABEL: shuffle_v8i16_08196e7f:
   1025 ; AVX:       # %bb.0:
   1026 ; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
   1027 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
   1028 ; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1029 ; AVX-NEXT:    retq
   1030   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 6, i32 14, i32 7, i32 15>
   1031   ret <8 x i16> %shuffle
   1032 }
   1033 
   1034 define <8 x i16> @shuffle_v8i16_0c1d6879(<8 x i16> %a, <8 x i16> %b) {
   1035 ; SSE-LABEL: shuffle_v8i16_0c1d6879:
   1036 ; SSE:       # %bb.0:
   1037 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
   1038 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
   1039 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1040 ; SSE-NEXT:    retq
   1041 ;
   1042 ; AVX-LABEL: shuffle_v8i16_0c1d6879:
   1043 ; AVX:       # %bb.0:
   1044 ; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
   1045 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
   1046 ; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1047 ; AVX-NEXT:    retq
   1048   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 6, i32 8, i32 7, i32 9>
   1049   ret <8 x i16> %shuffle
   1050 }
   1051 
   1052 define <8 x i16> @shuffle_v8i16_109832ba(<8 x i16> %a, <8 x i16> %b) {
   1053 ; SSE-LABEL: shuffle_v8i16_109832ba:
   1054 ; SSE:       # %bb.0:
   1055 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1056 ; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
   1057 ; SSE-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
   1058 ; SSE-NEXT:    retq
   1059 ;
   1060 ; AVX1-LABEL: shuffle_v8i16_109832ba:
   1061 ; AVX1:       # %bb.0:
   1062 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1063 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
   1064 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
   1065 ; AVX1-NEXT:    retq
   1066 ;
   1067 ; AVX2-SLOW-LABEL: shuffle_v8i16_109832ba:
   1068 ; AVX2-SLOW:       # %bb.0:
   1069 ; AVX2-SLOW-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1070 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
   1071 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
   1072 ; AVX2-SLOW-NEXT:    retq
   1073 ;
   1074 ; AVX2-FAST-LABEL: shuffle_v8i16_109832ba:
   1075 ; AVX2-FAST:       # %bb.0:
   1076 ; AVX2-FAST-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1077 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,6,7,2,3,12,13,8,9,14,15,10,11]
   1078 ; AVX2-FAST-NEXT:    retq
   1079 ;
   1080 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_109832ba:
   1081 ; AVX512VL-SLOW:       # %bb.0:
   1082 ; AVX512VL-SLOW-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1083 ; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
   1084 ; AVX512VL-SLOW-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
   1085 ; AVX512VL-SLOW-NEXT:    retq
   1086 ;
   1087 ; AVX512VL-FAST-LABEL: shuffle_v8i16_109832ba:
   1088 ; AVX512VL-FAST:       # %bb.0:
   1089 ; AVX512VL-FAST-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1090 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,6,7,2,3,12,13,8,9,14,15,10,11]
   1091 ; AVX512VL-FAST-NEXT:    retq
   1092   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 9, i32 8, i32 3, i32 2, i32 11, i32 10>
   1093   ret <8 x i16> %shuffle
   1094 }
   1095 
   1096 define <8 x i16> @shuffle_v8i16_8091a2b3(<8 x i16> %a, <8 x i16> %b) {
   1097 ; SSE-LABEL: shuffle_v8i16_8091a2b3:
   1098 ; SSE:       # %bb.0:
   1099 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
   1100 ; SSE-NEXT:    movdqa %xmm1, %xmm0
   1101 ; SSE-NEXT:    retq
   1102 ;
   1103 ; AVX-LABEL: shuffle_v8i16_8091a2b3:
   1104 ; AVX:       # %bb.0:
   1105 ; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
   1106 ; AVX-NEXT:    retq
   1107   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3>
   1108   ret <8 x i16> %shuffle
   1109 }
   1110 define <8 x i16> @shuffle_v8i16_c4d5e6f7(<8 x i16> %a, <8 x i16> %b) {
   1111 ; SSE-LABEL: shuffle_v8i16_c4d5e6f7:
   1112 ; SSE:       # %bb.0:
   1113 ; SSE-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
   1114 ; SSE-NEXT:    movdqa %xmm1, %xmm0
   1115 ; SSE-NEXT:    retq
   1116 ;
   1117 ; AVX-LABEL: shuffle_v8i16_c4d5e6f7:
   1118 ; AVX:       # %bb.0:
   1119 ; AVX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
   1120 ; AVX-NEXT:    retq
   1121   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
   1122   ret <8 x i16> %shuffle
   1123 }
   1124 
   1125 define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) {
   1126 ; SSE2-LABEL: shuffle_v8i16_0213cedf:
   1127 ; SSE2:       # %bb.0:
   1128 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm0[0,2,1,3,4,5,6,7]
   1129 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,6,5,7]
   1130 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
   1131 ; SSE2-NEXT:    retq
   1132 ;
   1133 ; SSSE3-LABEL: shuffle_v8i16_0213cedf:
   1134 ; SSSE3:       # %bb.0:
   1135 ; SSSE3-NEXT:    pshuflw {{.*#+}} xmm2 = xmm0[0,2,1,3,4,5,6,7]
   1136 ; SSSE3-NEXT:    pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,6,5,7]
   1137 ; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
   1138 ; SSSE3-NEXT:    retq
   1139 ;
   1140 ; SSE41-LABEL: shuffle_v8i16_0213cedf:
   1141 ; SSE41:       # %bb.0:
   1142 ; SSE41-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
   1143 ; SSE41-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
   1144 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
   1145 ; SSE41-NEXT:    retq
   1146 ;
   1147 ; AVX1-LABEL: shuffle_v8i16_0213cedf:
   1148 ; AVX1:       # %bb.0:
   1149 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
   1150 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
   1151 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
   1152 ; AVX1-NEXT:    retq
   1153 ;
   1154 ; AVX2-SLOW-LABEL: shuffle_v8i16_0213cedf:
   1155 ; AVX2-SLOW:       # %bb.0:
   1156 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
   1157 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
   1158 ; AVX2-SLOW-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
   1159 ; AVX2-SLOW-NEXT:    retq
   1160 ;
   1161 ; AVX2-FAST-LABEL: shuffle_v8i16_0213cedf:
   1162 ; AVX2-FAST:       # %bb.0:
   1163 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,12,13,10,11,14,15,8,9,12,13,10,11,14,15]
   1164 ; AVX2-FAST-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
   1165 ; AVX2-FAST-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1166 ; AVX2-FAST-NEXT:    retq
   1167 ;
   1168 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_0213cedf:
   1169 ; AVX512VL-SLOW:       # %bb.0:
   1170 ; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
   1171 ; AVX512VL-SLOW-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
   1172 ; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
   1173 ; AVX512VL-SLOW-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1174 ; AVX512VL-SLOW-NEXT:    retq
   1175 ;
   1176 ; AVX512VL-FAST-LABEL: shuffle_v8i16_0213cedf:
   1177 ; AVX512VL-FAST:       # %bb.0:
   1178 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,12,13,10,11,14,15,8,9,12,13,10,11,14,15]
   1179 ; AVX512VL-FAST-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
   1180 ; AVX512VL-FAST-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1181 ; AVX512VL-FAST-NEXT:    retq
   1182   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 12, i32 14, i32 13, i32 15>
   1183   ret <8 x i16> %shuffle
   1184 }
   1185 
   1186 define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) {
   1187 ; SSE2-LABEL: shuffle_v8i16_443aXXXX:
   1188 ; SSE2:       # %bb.0:
   1189 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,0,65535,65535,65535,65535,65535]
   1190 ; SSE2-NEXT:    pand %xmm2, %xmm0
   1191 ; SSE2-NEXT:    pandn %xmm1, %xmm2
   1192 ; SSE2-NEXT:    por %xmm0, %xmm2
   1193 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,1,2,3]
   1194 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
   1195 ; SSE2-NEXT:    retq
   1196 ;
   1197 ; SSSE3-LABEL: shuffle_v8i16_443aXXXX:
   1198 ; SSSE3:       # %bb.0:
   1199 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[4,5,u,u,u,u,u,u,u,u]
   1200 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7],zero,zero,xmm0[u,u,u,u,u,u,u,u]
   1201 ; SSSE3-NEXT:    por %xmm1, %xmm0
   1202 ; SSSE3-NEXT:    retq
   1203 ;
   1204 ; SSE41-LABEL: shuffle_v8i16_443aXXXX:
   1205 ; SSE41:       # %bb.0:
   1206 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   1207 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
   1208 ; SSE41-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
   1209 ; SSE41-NEXT:    retq
   1210 ;
   1211 ; AVX1-LABEL: shuffle_v8i16_443aXXXX:
   1212 ; AVX1:       # %bb.0:
   1213 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   1214 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
   1215 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
   1216 ; AVX1-NEXT:    retq
   1217 ;
   1218 ; AVX2-SLOW-LABEL: shuffle_v8i16_443aXXXX:
   1219 ; AVX2-SLOW:       # %bb.0:
   1220 ; AVX2-SLOW-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   1221 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
   1222 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
   1223 ; AVX2-SLOW-NEXT:    retq
   1224 ;
   1225 ; AVX2-FAST-LABEL: shuffle_v8i16_443aXXXX:
   1226 ; AVX2-FAST:       # %bb.0:
   1227 ; AVX2-FAST-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   1228 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7,4,5,8,9,10,11,12,13,14,15]
   1229 ; AVX2-FAST-NEXT:    retq
   1230 ;
   1231 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_443aXXXX:
   1232 ; AVX512VL-SLOW:       # %bb.0:
   1233 ; AVX512VL-SLOW-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   1234 ; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
   1235 ; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
   1236 ; AVX512VL-SLOW-NEXT:    retq
   1237 ;
   1238 ; AVX512VL-FAST-LABEL: shuffle_v8i16_443aXXXX:
   1239 ; AVX512VL-FAST:       # %bb.0:
   1240 ; AVX512VL-FAST-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   1241 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7,4,5,8,9,10,11,12,13,14,15]
   1242 ; AVX512VL-FAST-NEXT:    retq
   1243   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 3, i32 10, i32 undef, i32 undef, i32 undef, i32 undef>
   1244   ret <8 x i16> %shuffle
   1245 }
   1246 
   1247 define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) {
   1248 ; SSE2-LABEL: shuffle_v8i16_032dXXXX:
   1249 ; SSE2:       # %bb.0:
   1250 ; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
   1251 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,1,2,0]
   1252 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,6,5,6,7]
   1253 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
   1254 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
   1255 ; SSE2-NEXT:    retq
   1256 ;
   1257 ; SSSE3-LABEL: shuffle_v8i16_032dXXXX:
   1258 ; SSSE3:       # %bb.0:
   1259 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u]
   1260 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
   1261 ; SSSE3-NEXT:    por %xmm1, %xmm0
   1262 ; SSSE3-NEXT:    retq
   1263 ;
   1264 ; SSE41-LABEL: shuffle_v8i16_032dXXXX:
   1265 ; SSE41:       # %bb.0:
   1266 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
   1267 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
   1268 ; SSE41-NEXT:    retq
   1269 ;
   1270 ; AVX1-LABEL: shuffle_v8i16_032dXXXX:
   1271 ; AVX1:       # %bb.0:
   1272 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
   1273 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
   1274 ; AVX1-NEXT:    retq
   1275 ;
   1276 ; AVX2OR512VL-LABEL: shuffle_v8i16_032dXXXX:
   1277 ; AVX2OR512VL:       # %bb.0:
   1278 ; AVX2OR512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
   1279 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
   1280 ; AVX2OR512VL-NEXT:    retq
   1281   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 3, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
   1282   ret <8 x i16> %shuffle
   1283 }
   1284 define <8 x i16> @shuffle_v8i16_XXXdXXXX(<8 x i16> %a, <8 x i16> %b) {
   1285 ; SSE-LABEL: shuffle_v8i16_XXXdXXXX:
   1286 ; SSE:       # %bb.0:
   1287 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
   1288 ; SSE-NEXT:    retq
   1289 ;
   1290 ; AVX-LABEL: shuffle_v8i16_XXXdXXXX:
   1291 ; AVX:       # %bb.0:
   1292 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm1[2,2,3,3]
   1293 ; AVX-NEXT:    retq
   1294   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
   1295   ret <8 x i16> %shuffle
   1296 }
   1297 
   1298 define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) {
   1299 ; SSE2-LABEL: shuffle_v8i16_012dXXXX:
   1300 ; SSE2:       # %bb.0:
   1301 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535]
   1302 ; SSE2-NEXT:    pand %xmm2, %xmm0
   1303 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
   1304 ; SSE2-NEXT:    pandn %xmm1, %xmm2
   1305 ; SSE2-NEXT:    por %xmm2, %xmm0
   1306 ; SSE2-NEXT:    retq
   1307 ;
   1308 ; SSSE3-LABEL: shuffle_v8i16_012dXXXX:
   1309 ; SSSE3:       # %bb.0:
   1310 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u]
   1311 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
   1312 ; SSSE3-NEXT:    por %xmm1, %xmm0
   1313 ; SSSE3-NEXT:    retq
   1314 ;
   1315 ; SSE41-LABEL: shuffle_v8i16_012dXXXX:
   1316 ; SSE41:       # %bb.0:
   1317 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
   1318 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   1319 ; SSE41-NEXT:    retq
   1320 ;
   1321 ; AVX-LABEL: shuffle_v8i16_012dXXXX:
   1322 ; AVX:       # %bb.0:
   1323 ; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
   1324 ; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   1325 ; AVX-NEXT:    retq
   1326   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
   1327   ret <8 x i16> %shuffle
   1328 }
   1329 
   1330 define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) {
   1331 ; SSE2-LABEL: shuffle_v8i16_XXXXcde3:
   1332 ; SSE2:       # %bb.0:
   1333 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535,65535,65535,65535,0]
   1334 ; SSE2-NEXT:    pand %xmm2, %xmm1
   1335 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
   1336 ; SSE2-NEXT:    pandn %xmm0, %xmm2
   1337 ; SSE2-NEXT:    por %xmm1, %xmm2
   1338 ; SSE2-NEXT:    movdqa %xmm2, %xmm0
   1339 ; SSE2-NEXT:    retq
   1340 ;
   1341 ; SSSE3-LABEL: shuffle_v8i16_XXXXcde3:
   1342 ; SSSE3:       # %bb.0:
   1343 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm0[6,7]
   1344 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,8,9,10,11,12,13],zero,zero
   1345 ; SSSE3-NEXT:    por %xmm1, %xmm0
   1346 ; SSSE3-NEXT:    retq
   1347 ;
   1348 ; SSE41-LABEL: shuffle_v8i16_XXXXcde3:
   1349 ; SSE41:       # %bb.0:
   1350 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
   1351 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
   1352 ; SSE41-NEXT:    retq
   1353 ;
   1354 ; AVX1-LABEL: shuffle_v8i16_XXXXcde3:
   1355 ; AVX1:       # %bb.0:
   1356 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
   1357 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
   1358 ; AVX1-NEXT:    retq
   1359 ;
   1360 ; AVX2OR512VL-LABEL: shuffle_v8i16_XXXXcde3:
   1361 ; AVX2OR512VL:       # %bb.0:
   1362 ; AVX2OR512VL-NEXT:    vpbroadcastq %xmm0, %xmm0
   1363 ; AVX2OR512VL-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
   1364 ; AVX2OR512VL-NEXT:    retq
   1365   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 3>
   1366   ret <8 x i16> %shuffle
   1367 }
   1368 
   1369 define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) {
   1370 ; SSE2-LABEL: shuffle_v8i16_cde3XXXX:
   1371 ; SSE2:       # %bb.0:
   1372 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535]
   1373 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
   1374 ; SSE2-NEXT:    pand %xmm2, %xmm1
   1375 ; SSE2-NEXT:    pandn %xmm0, %xmm2
   1376 ; SSE2-NEXT:    por %xmm1, %xmm2
   1377 ; SSE2-NEXT:    movdqa %xmm2, %xmm0
   1378 ; SSE2-NEXT:    retq
   1379 ;
   1380 ; SSSE3-LABEL: shuffle_v8i16_cde3XXXX:
   1381 ; SSSE3:       # %bb.0:
   1382 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[6,7,u,u,u,u,u,u,u,u]
   1383 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13],zero,zero,xmm1[u,u,u,u,u,u,u,u]
   1384 ; SSSE3-NEXT:    por %xmm1, %xmm0
   1385 ; SSSE3-NEXT:    retq
   1386 ;
   1387 ; SSE41-LABEL: shuffle_v8i16_cde3XXXX:
   1388 ; SSE41:       # %bb.0:
   1389 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
   1390 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
   1391 ; SSE41-NEXT:    retq
   1392 ;
   1393 ; AVX-LABEL: shuffle_v8i16_cde3XXXX:
   1394 ; AVX:       # %bb.0:
   1395 ; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
   1396 ; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
   1397 ; AVX-NEXT:    retq
   1398   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 13, i32 14, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
   1399   ret <8 x i16> %shuffle
   1400 }
   1401 
   1402 define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) {
   1403 ; SSE2-LABEL: shuffle_v8i16_012dcde3:
   1404 ; SSE2:       # %bb.0:
   1405 ; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
   1406 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,3,2,1]
   1407 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[3,1,2,0,4,5,6,7]
   1408 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
   1409 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
   1410 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,7]
   1411 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
   1412 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,3,0,2,4,5,6,7]
   1413 ; SSE2-NEXT:    retq
   1414 ;
   1415 ; SSSE3-LABEL: shuffle_v8i16_012dcde3:
   1416 ; SSSE3:       # %bb.0:
   1417 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,8,9,10,11,12,13],zero,zero
   1418 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7]
   1419 ; SSSE3-NEXT:    por %xmm1, %xmm0
   1420 ; SSSE3-NEXT:    retq
   1421 ;
   1422 ; SSE41-LABEL: shuffle_v8i16_012dcde3:
   1423 ; SSE41:       # %bb.0:
   1424 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
   1425 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
   1426 ; SSE41-NEXT:    retq
   1427 ;
   1428 ; AVX1-LABEL: shuffle_v8i16_012dcde3:
   1429 ; AVX1:       # %bb.0:
   1430 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
   1431 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
   1432 ; AVX1-NEXT:    retq
   1433 ;
   1434 ; AVX2OR512VL-LABEL: shuffle_v8i16_012dcde3:
   1435 ; AVX2OR512VL:       # %bb.0:
   1436 ; AVX2OR512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
   1437 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
   1438 ; AVX2OR512VL-NEXT:    retq
   1439   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 12, i32 13, i32 14, i32 3>
   1440   ret <8 x i16> %shuffle
   1441 }
   1442 
   1443 define <8 x i16> @shuffle_v8i16_0923cde7(<8 x i16> %a, <8 x i16> %b) {
   1444 ; SSE2-LABEL: shuffle_v8i16_0923cde7:
   1445 ; SSE2:       # %bb.0:
   1446 ; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535]
   1447 ; SSE2-NEXT:    andps %xmm2, %xmm0
   1448 ; SSE2-NEXT:    andnps %xmm1, %xmm2
   1449 ; SSE2-NEXT:    orps %xmm2, %xmm0
   1450 ; SSE2-NEXT:    retq
   1451 ;
   1452 ; SSSE3-LABEL: shuffle_v8i16_0923cde7:
   1453 ; SSSE3:       # %bb.0:
   1454 ; SSSE3-NEXT:    movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535]
   1455 ; SSSE3-NEXT:    andps %xmm2, %xmm0
   1456 ; SSSE3-NEXT:    andnps %xmm1, %xmm2
   1457 ; SSSE3-NEXT:    orps %xmm2, %xmm0
   1458 ; SSSE3-NEXT:    retq
   1459 ;
   1460 ; SSE41-LABEL: shuffle_v8i16_0923cde7:
   1461 ; SSE41:       # %bb.0:
   1462 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
   1463 ; SSE41-NEXT:    retq
   1464 ;
   1465 ; AVX-LABEL: shuffle_v8i16_0923cde7:
   1466 ; AVX:       # %bb.0:
   1467 ; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
   1468 ; AVX-NEXT:    retq
   1469   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 12, i32 13, i32 14, i32 7>
   1470   ret <8 x i16> %shuffle
   1471 }
   1472 
   1473 define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) {
   1474 ; SSE2-LABEL: shuffle_v8i16_XXX1X579:
   1475 ; SSE2:       # %bb.0:
   1476 ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,1,2,0]
   1477 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,65535,65535,0]
   1478 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
   1479 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
   1480 ; SSE2-NEXT:    pand %xmm1, %xmm0
   1481 ; SSE2-NEXT:    pandn %xmm2, %xmm1
   1482 ; SSE2-NEXT:    por %xmm0, %xmm1
   1483 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
   1484 ; SSE2-NEXT:    retq
   1485 ;
   1486 ; SSSE3-LABEL: shuffle_v8i16_XXX1X579:
   1487 ; SSSE3:       # %bb.0:
   1488 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u],zero,zero,xmm1[u,u],zero,zero,zero,zero,xmm1[2,3]
   1489 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,2,3,u,u,10,11,14,15],zero,zero
   1490 ; SSSE3-NEXT:    por %xmm1, %xmm0
   1491 ; SSSE3-NEXT:    retq
   1492 ;
   1493 ; SSE41-LABEL: shuffle_v8i16_XXX1X579:
   1494 ; SSE41:       # %bb.0:
   1495 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
   1496 ; SSE41-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
   1497 ; SSE41-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
   1498 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   1499 ; SSE41-NEXT:    retq
   1500 ;
   1501 ; AVX1-LABEL: shuffle_v8i16_XXX1X579:
   1502 ; AVX1:       # %bb.0:
   1503 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
   1504 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
   1505 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
   1506 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   1507 ; AVX1-NEXT:    retq
   1508 ;
   1509 ; AVX2-SLOW-LABEL: shuffle_v8i16_XXX1X579:
   1510 ; AVX2-SLOW:       # %bb.0:
   1511 ; AVX2-SLOW-NEXT:    vpbroadcastd %xmm1, %xmm1
   1512 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
   1513 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
   1514 ; AVX2-SLOW-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   1515 ; AVX2-SLOW-NEXT:    retq
   1516 ;
   1517 ; AVX2-FAST-LABEL: shuffle_v8i16_XXX1X579:
   1518 ; AVX2-FAST:       # %bb.0:
   1519 ; AVX2-FAST-NEXT:    vpbroadcastd %xmm1, %xmm1
   1520 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,2,3,8,9,10,11,14,15,14,15]
   1521 ; AVX2-FAST-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   1522 ; AVX2-FAST-NEXT:    retq
   1523 ;
   1524 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_XXX1X579:
   1525 ; AVX512VL-SLOW:       # %bb.0:
   1526 ; AVX512VL-SLOW-NEXT:    vpbroadcastd %xmm1, %xmm1
   1527 ; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
   1528 ; AVX512VL-SLOW-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
   1529 ; AVX512VL-SLOW-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   1530 ; AVX512VL-SLOW-NEXT:    retq
   1531 ;
   1532 ; AVX512VL-FAST-LABEL: shuffle_v8i16_XXX1X579:
   1533 ; AVX512VL-FAST:       # %bb.0:
   1534 ; AVX512VL-FAST-NEXT:    vpbroadcastd %xmm1, %xmm1
   1535 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,2,3,8,9,10,11,14,15,14,15]
   1536 ; AVX512VL-FAST-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   1537 ; AVX512VL-FAST-NEXT:    retq
   1538   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 9>
   1539   ret <8 x i16> %shuffle
   1540 }
   1541 
   1542 define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) {
   1543 ; SSE2-LABEL: shuffle_v8i16_XX4X8acX:
   1544 ; SSE2:       # %bb.0:
   1545 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
   1546 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
   1547 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7]
   1548 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,2],xmm1[2,3]
   1549 ; SSE2-NEXT:    retq
   1550 ;
   1551 ; SSSE3-LABEL: shuffle_v8i16_XX4X8acX:
   1552 ; SSSE3:       # %bb.0:
   1553 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,8,9,u,u],zero,zero,zero,zero,zero,zero,xmm0[u,u]
   1554 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u],zero,zero,xmm1[u,u,0,1,4,5,8,9,u,u]
   1555 ; SSSE3-NEXT:    por %xmm1, %xmm0
   1556 ; SSSE3-NEXT:    retq
   1557 ;
   1558 ; SSE41-LABEL: shuffle_v8i16_XX4X8acX:
   1559 ; SSE41:       # %bb.0:
   1560 ; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
   1561 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
   1562 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
   1563 ; SSE41-NEXT:    retq
   1564 ;
   1565 ; AVX1-LABEL: shuffle_v8i16_XX4X8acX:
   1566 ; AVX1:       # %bb.0:
   1567 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
   1568 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
   1569 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
   1570 ; AVX1-NEXT:    retq
   1571 ;
   1572 ; AVX2OR512VL-LABEL: shuffle_v8i16_XX4X8acX:
   1573 ; AVX2OR512VL:       # %bb.0:
   1574 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
   1575 ; AVX2OR512VL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
   1576 ; AVX2OR512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
   1577 ; AVX2OR512VL-NEXT:    retq
   1578   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 undef>
   1579   ret <8 x i16> %shuffle
   1580 }
   1581 
   1582 define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) {
   1583 ; SSE-LABEL: shuffle_v8i16_8zzzzzzz:
   1584 ; SSE:       # %bb.0:
   1585 ; SSE-NEXT:    movzwl %di, %eax
   1586 ; SSE-NEXT:    movd %eax, %xmm0
   1587 ; SSE-NEXT:    retq
   1588 ;
   1589 ; AVX-LABEL: shuffle_v8i16_8zzzzzzz:
   1590 ; AVX:       # %bb.0:
   1591 ; AVX-NEXT:    movzwl %di, %eax
   1592 ; AVX-NEXT:    vmovd %eax, %xmm0
   1593 ; AVX-NEXT:    retq
   1594   %a = insertelement <8 x i16> undef, i16 %i, i32 0
   1595   %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   1596   ret <8 x i16> %shuffle
   1597 }
   1598 
   1599 define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
   1600 ; SSE-LABEL: shuffle_v8i16_z8zzzzzz:
   1601 ; SSE:       # %bb.0:
   1602 ; SSE-NEXT:    pxor %xmm0, %xmm0
   1603 ; SSE-NEXT:    pinsrw $1, %edi, %xmm0
   1604 ; SSE-NEXT:    retq
   1605 ;
   1606 ; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
   1607 ; AVX:       # %bb.0:
   1608 ; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
   1609 ; AVX-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0
   1610 ; AVX-NEXT:    retq
   1611   %a = insertelement <8 x i16> undef, i16 %i, i32 0
   1612   %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>
   1613   ret <8 x i16> %shuffle
   1614 }
   1615 
   1616 define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
   1617 ; SSE-LABEL: shuffle_v8i16_zzzzz8zz:
   1618 ; SSE:       # %bb.0:
   1619 ; SSE-NEXT:    pxor %xmm0, %xmm0
   1620 ; SSE-NEXT:    pinsrw $5, %edi, %xmm0
   1621 ; SSE-NEXT:    retq
   1622 ;
   1623 ; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
   1624 ; AVX:       # %bb.0:
   1625 ; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
   1626 ; AVX-NEXT:    vpinsrw $5, %edi, %xmm0, %xmm0
   1627 ; AVX-NEXT:    retq
   1628   %a = insertelement <8 x i16> undef, i16 %i, i32 0
   1629   %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>
   1630   ret <8 x i16> %shuffle
   1631 }
   1632 
   1633 define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
   1634 ; SSE-LABEL: shuffle_v8i16_zuuzuuz8:
   1635 ; SSE:       # %bb.0:
   1636 ; SSE-NEXT:    pxor %xmm0, %xmm0
   1637 ; SSE-NEXT:    pinsrw $7, %edi, %xmm0
   1638 ; SSE-NEXT:    retq
   1639 ;
   1640 ; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
   1641 ; AVX:       # %bb.0:
   1642 ; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
   1643 ; AVX-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0
   1644 ; AVX-NEXT:    retq
   1645   %a = insertelement <8 x i16> undef, i16 %i, i32 0
   1646   %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>
   1647   ret <8 x i16> %shuffle
   1648 }
   1649 
   1650 define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) {
   1651 ; SSE-LABEL: shuffle_v8i16_zzBzzzzz:
   1652 ; SSE:       # %bb.0:
   1653 ; SSE-NEXT:    pxor %xmm0, %xmm0
   1654 ; SSE-NEXT:    pinsrw $2, %edi, %xmm0
   1655 ; SSE-NEXT:    retq
   1656 ;
   1657 ; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
   1658 ; AVX:       # %bb.0:
   1659 ; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
   1660 ; AVX-NEXT:    vpinsrw $2, %edi, %xmm0, %xmm0
   1661 ; AVX-NEXT:    retq
   1662   %a = insertelement <8 x i16> undef, i16 %i, i32 3
   1663   %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>
   1664   ret <8 x i16> %shuffle
   1665 }
   1666 
   1667 define <8 x i16> @shuffle_v8i16_def01234(<8 x i16> %a, <8 x i16> %b) {
   1668 ; SSE2-LABEL: shuffle_v8i16_def01234:
   1669 ; SSE2:       # %bb.0:
   1670 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1671 ; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
   1672 ; SSE2-NEXT:    por %xmm1, %xmm0
   1673 ; SSE2-NEXT:    retq
   1674 ;
   1675 ; SSSE3-LABEL: shuffle_v8i16_def01234:
   1676 ; SSSE3:       # %bb.0:
   1677 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
   1678 ; SSSE3-NEXT:    retq
   1679 ;
   1680 ; SSE41-LABEL: shuffle_v8i16_def01234:
   1681 ; SSE41:       # %bb.0:
   1682 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
   1683 ; SSE41-NEXT:    retq
   1684 ;
   1685 ; AVX-LABEL: shuffle_v8i16_def01234:
   1686 ; AVX:       # %bb.0:
   1687 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
   1688 ; AVX-NEXT:    retq
   1689   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
   1690   ret <8 x i16> %shuffle
   1691 }
   1692 
   1693 define <8 x i16> @shuffle_v8i16_ueuu123u(<8 x i16> %a, <8 x i16> %b) {
   1694 ; SSE2-LABEL: shuffle_v8i16_ueuu123u:
   1695 ; SSE2:       # %bb.0:
   1696 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1697 ; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
   1698 ; SSE2-NEXT:    por %xmm1, %xmm0
   1699 ; SSE2-NEXT:    retq
   1700 ;
   1701 ; SSSE3-LABEL: shuffle_v8i16_ueuu123u:
   1702 ; SSSE3:       # %bb.0:
   1703 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
   1704 ; SSSE3-NEXT:    retq
   1705 ;
   1706 ; SSE41-LABEL: shuffle_v8i16_ueuu123u:
   1707 ; SSE41:       # %bb.0:
   1708 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
   1709 ; SSE41-NEXT:    retq
   1710 ;
   1711 ; AVX-LABEL: shuffle_v8i16_ueuu123u:
   1712 ; AVX:       # %bb.0:
   1713 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
   1714 ; AVX-NEXT:    retq
   1715   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 14, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
   1716   ret <8 x i16> %shuffle
   1717 }
   1718 
   1719 define <8 x i16> @shuffle_v8i16_56701234(<8 x i16> %a, <8 x i16> %b) {
   1720 ; SSE2-LABEL: shuffle_v8i16_56701234:
   1721 ; SSE2:       # %bb.0:
   1722 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   1723 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1724 ; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
   1725 ; SSE2-NEXT:    por %xmm1, %xmm0
   1726 ; SSE2-NEXT:    retq
   1727 ;
   1728 ; SSSE3-LABEL: shuffle_v8i16_56701234:
   1729 ; SSSE3:       # %bb.0:
   1730 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   1731 ; SSSE3-NEXT:    retq
   1732 ;
   1733 ; SSE41-LABEL: shuffle_v8i16_56701234:
   1734 ; SSE41:       # %bb.0:
   1735 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   1736 ; SSE41-NEXT:    retq
   1737 ;
   1738 ; AVX-LABEL: shuffle_v8i16_56701234:
   1739 ; AVX:       # %bb.0:
   1740 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   1741 ; AVX-NEXT:    retq
   1742   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4>
   1743   ret <8 x i16> %shuffle
   1744 }
   1745 
   1746 define <8 x i16> @shuffle_v8i16_u6uu123u(<8 x i16> %a, <8 x i16> %b) {
   1747 ; SSE2-LABEL: shuffle_v8i16_u6uu123u:
   1748 ; SSE2:       # %bb.0:
   1749 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   1750 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1751 ; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
   1752 ; SSE2-NEXT:    por %xmm1, %xmm0
   1753 ; SSE2-NEXT:    retq
   1754 ;
   1755 ; SSSE3-LABEL: shuffle_v8i16_u6uu123u:
   1756 ; SSSE3:       # %bb.0:
   1757 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   1758 ; SSSE3-NEXT:    retq
   1759 ;
   1760 ; SSE41-LABEL: shuffle_v8i16_u6uu123u:
   1761 ; SSE41:       # %bb.0:
   1762 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   1763 ; SSE41-NEXT:    retq
   1764 ;
   1765 ; AVX-LABEL: shuffle_v8i16_u6uu123u:
   1766 ; AVX:       # %bb.0:
   1767 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   1768 ; AVX-NEXT:    retq
   1769   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
   1770   ret <8 x i16> %shuffle
   1771 }
   1772 
   1773 define <8 x i16> @shuffle_v8i16_uuuu123u(<8 x i16> %a, <8 x i16> %b) {
   1774 ; SSE-LABEL: shuffle_v8i16_uuuu123u:
   1775 ; SSE:       # %bb.0:
   1776 ; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
   1777 ; SSE-NEXT:    retq
   1778 ;
   1779 ; AVX-LABEL: shuffle_v8i16_uuuu123u:
   1780 ; AVX:       # %bb.0:
   1781 ; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
   1782 ; AVX-NEXT:    retq
   1783   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
   1784   ret <8 x i16> %shuffle
   1785 }
   1786 
   1787 define <8 x i16> @shuffle_v8i16_bcdef012(<8 x i16> %a, <8 x i16> %b) {
   1788 ; SSE2-LABEL: shuffle_v8i16_bcdef012:
   1789 ; SSE2:       # %bb.0:
   1790 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1791 ; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
   1792 ; SSE2-NEXT:    por %xmm1, %xmm0
   1793 ; SSE2-NEXT:    retq
   1794 ;
   1795 ; SSSE3-LABEL: shuffle_v8i16_bcdef012:
   1796 ; SSSE3:       # %bb.0:
   1797 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
   1798 ; SSSE3-NEXT:    retq
   1799 ;
   1800 ; SSE41-LABEL: shuffle_v8i16_bcdef012:
   1801 ; SSE41:       # %bb.0:
   1802 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
   1803 ; SSE41-NEXT:    retq
   1804 ;
   1805 ; AVX-LABEL: shuffle_v8i16_bcdef012:
   1806 ; AVX:       # %bb.0:
   1807 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
   1808 ; AVX-NEXT:    retq
   1809   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2>
   1810   ret <8 x i16> %shuffle
   1811 }
   1812 
   1813 define <8 x i16> @shuffle_v8i16_ucdeuu1u(<8 x i16> %a, <8 x i16> %b) {
   1814 ; SSE2-LABEL: shuffle_v8i16_ucdeuu1u:
   1815 ; SSE2:       # %bb.0:
   1816 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1817 ; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
   1818 ; SSE2-NEXT:    por %xmm1, %xmm0
   1819 ; SSE2-NEXT:    retq
   1820 ;
   1821 ; SSSE3-LABEL: shuffle_v8i16_ucdeuu1u:
   1822 ; SSSE3:       # %bb.0:
   1823 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
   1824 ; SSSE3-NEXT:    retq
   1825 ;
   1826 ; SSE41-LABEL: shuffle_v8i16_ucdeuu1u:
   1827 ; SSE41:       # %bb.0:
   1828 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
   1829 ; SSE41-NEXT:    retq
   1830 ;
   1831 ; AVX-LABEL: shuffle_v8i16_ucdeuu1u:
   1832 ; AVX:       # %bb.0:
   1833 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
   1834 ; AVX-NEXT:    retq
   1835   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 1, i32 undef>
   1836   ret <8 x i16> %shuffle
   1837 }
   1838 
   1839 define <8 x i16> @shuffle_v8i16_34567012(<8 x i16> %a, <8 x i16> %b) {
   1840 ; SSE2-LABEL: shuffle_v8i16_34567012:
   1841 ; SSE2:       # %bb.0:
   1842 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   1843 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1844 ; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
   1845 ; SSE2-NEXT:    por %xmm1, %xmm0
   1846 ; SSE2-NEXT:    retq
   1847 ;
   1848 ; SSSE3-LABEL: shuffle_v8i16_34567012:
   1849 ; SSSE3:       # %bb.0:
   1850 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   1851 ; SSSE3-NEXT:    retq
   1852 ;
   1853 ; SSE41-LABEL: shuffle_v8i16_34567012:
   1854 ; SSE41:       # %bb.0:
   1855 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   1856 ; SSE41-NEXT:    retq
   1857 ;
   1858 ; AVX-LABEL: shuffle_v8i16_34567012:
   1859 ; AVX:       # %bb.0:
   1860 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   1861 ; AVX-NEXT:    retq
   1862   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2>
   1863   ret <8 x i16> %shuffle
   1864 }
   1865 
   1866 define <8 x i16> @shuffle_v8i16_u456uu1u(<8 x i16> %a, <8 x i16> %b) {
   1867 ; SSE2-LABEL: shuffle_v8i16_u456uu1u:
   1868 ; SSE2:       # %bb.0:
   1869 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   1870 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1871 ; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
   1872 ; SSE2-NEXT:    por %xmm1, %xmm0
   1873 ; SSE2-NEXT:    retq
   1874 ;
   1875 ; SSSE3-LABEL: shuffle_v8i16_u456uu1u:
   1876 ; SSSE3:       # %bb.0:
   1877 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   1878 ; SSSE3-NEXT:    retq
   1879 ;
   1880 ; SSE41-LABEL: shuffle_v8i16_u456uu1u:
   1881 ; SSE41:       # %bb.0:
   1882 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   1883 ; SSE41-NEXT:    retq
   1884 ;
   1885 ; AVX-LABEL: shuffle_v8i16_u456uu1u:
   1886 ; AVX:       # %bb.0:
   1887 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   1888 ; AVX-NEXT:    retq
   1889   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef>
   1890   ret <8 x i16> %shuffle
   1891 }
   1892 
   1893 define <8 x i16> @shuffle_v8i16_u456uuuu(<8 x i16> %a, <8 x i16> %b) {
   1894 ; SSE-LABEL: shuffle_v8i16_u456uuuu:
   1895 ; SSE:       # %bb.0:
   1896 ; SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1897 ; SSE-NEXT:    retq
   1898 ;
   1899 ; AVX-LABEL: shuffle_v8i16_u456uuuu:
   1900 ; AVX:       # %bb.0:
   1901 ; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1902 ; AVX-NEXT:    retq
   1903   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
   1904   ret <8 x i16> %shuffle
   1905 }
   1906 
   1907 define <8 x i16> @shuffle_v8i16_3456789a(<8 x i16> %a, <8 x i16> %b) {
   1908 ; SSE2-LABEL: shuffle_v8i16_3456789a:
   1909 ; SSE2:       # %bb.0:
   1910 ; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1911 ; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
   1912 ; SSE2-NEXT:    por %xmm1, %xmm0
   1913 ; SSE2-NEXT:    retq
   1914 ;
   1915 ; SSSE3-LABEL: shuffle_v8i16_3456789a:
   1916 ; SSSE3:       # %bb.0:
   1917 ; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
   1918 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
   1919 ; SSSE3-NEXT:    retq
   1920 ;
   1921 ; SSE41-LABEL: shuffle_v8i16_3456789a:
   1922 ; SSE41:       # %bb.0:
   1923 ; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
   1924 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
   1925 ; SSE41-NEXT:    retq
   1926 ;
   1927 ; AVX-LABEL: shuffle_v8i16_3456789a:
   1928 ; AVX:       # %bb.0:
   1929 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
   1930 ; AVX-NEXT:    retq
   1931   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
   1932   ret <8 x i16> %shuffle
   1933 }
   1934 
   1935 define <8 x i16> @shuffle_v8i16_u456uu9u(<8 x i16> %a, <8 x i16> %b) {
   1936 ; SSE2-LABEL: shuffle_v8i16_u456uu9u:
   1937 ; SSE2:       # %bb.0:
   1938 ; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1939 ; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
   1940 ; SSE2-NEXT:    por %xmm1, %xmm0
   1941 ; SSE2-NEXT:    retq
   1942 ;
   1943 ; SSSE3-LABEL: shuffle_v8i16_u456uu9u:
   1944 ; SSSE3:       # %bb.0:
   1945 ; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
   1946 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
   1947 ; SSSE3-NEXT:    retq
   1948 ;
   1949 ; SSE41-LABEL: shuffle_v8i16_u456uu9u:
   1950 ; SSE41:       # %bb.0:
   1951 ; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
   1952 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
   1953 ; SSE41-NEXT:    retq
   1954 ;
   1955 ; AVX-LABEL: shuffle_v8i16_u456uu9u:
   1956 ; AVX:       # %bb.0:
   1957 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
   1958 ; AVX-NEXT:    retq
   1959   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 9, i32 undef>
   1960   ret <8 x i16> %shuffle
   1961 }
   1962 
   1963 define <8 x i16> @shuffle_v8i16_56789abc(<8 x i16> %a, <8 x i16> %b) {
   1964 ; SSE2-LABEL: shuffle_v8i16_56789abc:
   1965 ; SSE2:       # %bb.0:
   1966 ; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1967 ; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
   1968 ; SSE2-NEXT:    por %xmm1, %xmm0
   1969 ; SSE2-NEXT:    retq
   1970 ;
   1971 ; SSSE3-LABEL: shuffle_v8i16_56789abc:
   1972 ; SSSE3:       # %bb.0:
   1973 ; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
   1974 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
   1975 ; SSSE3-NEXT:    retq
   1976 ;
   1977 ; SSE41-LABEL: shuffle_v8i16_56789abc:
   1978 ; SSE41:       # %bb.0:
   1979 ; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
   1980 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
   1981 ; SSE41-NEXT:    retq
   1982 ;
   1983 ; AVX-LABEL: shuffle_v8i16_56789abc:
   1984 ; AVX:       # %bb.0:
   1985 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
   1986 ; AVX-NEXT:    retq
   1987   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
   1988   ret <8 x i16> %shuffle
   1989 }
   1990 
   1991 define <8 x i16> @shuffle_v8i16_u6uu9abu(<8 x i16> %a, <8 x i16> %b) {
   1992 ; SSE2-LABEL: shuffle_v8i16_u6uu9abu:
   1993 ; SSE2:       # %bb.0:
   1994 ; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1995 ; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
   1996 ; SSE2-NEXT:    por %xmm1, %xmm0
   1997 ; SSE2-NEXT:    retq
   1998 ;
   1999 ; SSSE3-LABEL: shuffle_v8i16_u6uu9abu:
   2000 ; SSSE3:       # %bb.0:
   2001 ; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
   2002 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
   2003 ; SSSE3-NEXT:    retq
   2004 ;
   2005 ; SSE41-LABEL: shuffle_v8i16_u6uu9abu:
   2006 ; SSE41:       # %bb.0:
   2007 ; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
   2008 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
   2009 ; SSE41-NEXT:    retq
   2010 ;
   2011 ; AVX-LABEL: shuffle_v8i16_u6uu9abu:
   2012 ; AVX:       # %bb.0:
   2013 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
   2014 ; AVX-NEXT:    retq
   2015   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
   2016   ret <8 x i16> %shuffle
   2017 }
   2018 
   2019 define <8 x i16> @shuffle_v8i16_0uuu1uuu(<8 x i16> %a) {
   2020 ; SSE2-LABEL: shuffle_v8i16_0uuu1uuu:
   2021 ; SSE2:       # %bb.0:
   2022 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
   2023 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
   2024 ; SSE2-NEXT:    retq
   2025 ;
   2026 ; SSSE3-LABEL: shuffle_v8i16_0uuu1uuu:
   2027 ; SSSE3:       # %bb.0:
   2028 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
   2029 ; SSSE3-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
   2030 ; SSSE3-NEXT:    retq
   2031 ;
   2032 ; SSE41-LABEL: shuffle_v8i16_0uuu1uuu:
   2033 ; SSE41:       # %bb.0:
   2034 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   2035 ; SSE41-NEXT:    retq
   2036 ;
   2037 ; AVX-LABEL: shuffle_v8i16_0uuu1uuu:
   2038 ; AVX:       # %bb.0:
   2039 ; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   2040 ; AVX-NEXT:    retq
   2041   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef>
   2042   ret <8 x i16> %shuffle
   2043 }
   2044 
   2045 define <8 x i16> @shuffle_v8i16_0zzz1zzz(<8 x i16> %a) {
   2046 ; SSE2-LABEL: shuffle_v8i16_0zzz1zzz:
   2047 ; SSE2:       # %bb.0:
   2048 ; SSE2-NEXT:    pxor %xmm1, %xmm1
   2049 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2050 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   2051 ; SSE2-NEXT:    retq
   2052 ;
   2053 ; SSSE3-LABEL: shuffle_v8i16_0zzz1zzz:
   2054 ; SSSE3:       # %bb.0:
   2055 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
   2056 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2057 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   2058 ; SSSE3-NEXT:    retq
   2059 ;
   2060 ; SSE41-LABEL: shuffle_v8i16_0zzz1zzz:
   2061 ; SSE41:       # %bb.0:
   2062 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   2063 ; SSE41-NEXT:    retq
   2064 ;
   2065 ; AVX-LABEL: shuffle_v8i16_0zzz1zzz:
   2066 ; AVX:       # %bb.0:
   2067 ; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   2068 ; AVX-NEXT:    retq
   2069   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
   2070   ret <8 x i16> %shuffle
   2071 }
   2072 
   2073 define <8 x i16> @shuffle_v8i16_0u1u2u3u(<8 x i16> %a) {
   2074 ; SSE2-LABEL: shuffle_v8i16_0u1u2u3u:
   2075 ; SSE2:       # %bb.0:
   2076 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
   2077 ; SSE2-NEXT:    retq
   2078 ;
   2079 ; SSSE3-LABEL: shuffle_v8i16_0u1u2u3u:
   2080 ; SSSE3:       # %bb.0:
   2081 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
   2082 ; SSSE3-NEXT:    retq
   2083 ;
   2084 ; SSE41-LABEL: shuffle_v8i16_0u1u2u3u:
   2085 ; SSE41:       # %bb.0:
   2086 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   2087 ; SSE41-NEXT:    retq
   2088 ;
   2089 ; AVX-LABEL: shuffle_v8i16_0u1u2u3u:
   2090 ; AVX:       # %bb.0:
   2091 ; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   2092 ; AVX-NEXT:    retq
   2093   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef>
   2094   ret <8 x i16> %shuffle
   2095 }
   2096 
   2097 define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) {
   2098 ; SSE2-LABEL: shuffle_v8i16_0z1z2z3z:
   2099 ; SSE2:       # %bb.0:
   2100 ; SSE2-NEXT:    pxor %xmm1, %xmm1
   2101 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2102 ; SSE2-NEXT:    retq
   2103 ;
   2104 ; SSSE3-LABEL: shuffle_v8i16_0z1z2z3z:
   2105 ; SSSE3:       # %bb.0:
   2106 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
   2107 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2108 ; SSSE3-NEXT:    retq
   2109 ;
   2110 ; SSE41-LABEL: shuffle_v8i16_0z1z2z3z:
   2111 ; SSE41:       # %bb.0:
   2112 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   2113 ; SSE41-NEXT:    retq
   2114 ;
   2115 ; AVX-LABEL: shuffle_v8i16_0z1z2z3z:
   2116 ; AVX:       # %bb.0:
   2117 ; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   2118 ; AVX-NEXT:    retq
   2119   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
   2120   ret <8 x i16> %shuffle
   2121 }
   2122 
   2123 define <8 x i16> @shuffle_v8i16_01100110(<8 x i16> %a) {
   2124 ; SSE-LABEL: shuffle_v8i16_01100110:
   2125 ; SSE:       # %bb.0:
   2126 ; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
   2127 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
   2128 ; SSE-NEXT:    retq
   2129 ;
   2130 ; AVX1-LABEL: shuffle_v8i16_01100110:
   2131 ; AVX1:       # %bb.0:
   2132 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
   2133 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
   2134 ; AVX1-NEXT:    retq
   2135 ;
   2136 ; AVX2-SLOW-LABEL: shuffle_v8i16_01100110:
   2137 ; AVX2-SLOW:       # %bb.0:
   2138 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
   2139 ; AVX2-SLOW-NEXT:    vpbroadcastq %xmm0, %xmm0
   2140 ; AVX2-SLOW-NEXT:    retq
   2141 ;
   2142 ; AVX2-FAST-LABEL: shuffle_v8i16_01100110:
   2143 ; AVX2-FAST:       # %bb.0:
   2144 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
   2145 ; AVX2-FAST-NEXT:    retq
   2146 ;
   2147 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_01100110:
   2148 ; AVX512VL-SLOW:       # %bb.0:
   2149 ; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
   2150 ; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
   2151 ; AVX512VL-SLOW-NEXT:    retq
   2152 ;
   2153 ; AVX512VL-FAST-LABEL: shuffle_v8i16_01100110:
   2154 ; AVX512VL-FAST:       # %bb.0:
   2155 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
   2156 ; AVX512VL-FAST-NEXT:    retq
   2157   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 1, i32 0>
   2158   ret <8 x i16> %shuffle
   2159 }
   2160 
   2161 define <8 x i16> @shuffle_v8i16_01u0u110(<8 x i16> %a) {
   2162 ; SSE-LABEL: shuffle_v8i16_01u0u110:
   2163 ; SSE:       # %bb.0:
   2164 ; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
   2165 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
   2166 ; SSE-NEXT:    retq
   2167 ;
   2168 ; AVX1-LABEL: shuffle_v8i16_01u0u110:
   2169 ; AVX1:       # %bb.0:
   2170 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
   2171 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
   2172 ; AVX1-NEXT:    retq
   2173 ;
   2174 ; AVX2-SLOW-LABEL: shuffle_v8i16_01u0u110:
   2175 ; AVX2-SLOW:       # %bb.0:
   2176 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
   2177 ; AVX2-SLOW-NEXT:    vpbroadcastq %xmm0, %xmm0
   2178 ; AVX2-SLOW-NEXT:    retq
   2179 ;
   2180 ; AVX2-FAST-LABEL: shuffle_v8i16_01u0u110:
   2181 ; AVX2-FAST:       # %bb.0:
   2182 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
   2183 ; AVX2-FAST-NEXT:    retq
   2184 ;
   2185 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_01u0u110:
   2186 ; AVX512VL-SLOW:       # %bb.0:
   2187 ; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
   2188 ; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
   2189 ; AVX512VL-SLOW-NEXT:    retq
   2190 ;
   2191 ; AVX512VL-FAST-LABEL: shuffle_v8i16_01u0u110:
   2192 ; AVX512VL-FAST:       # %bb.0:
   2193 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
   2194 ; AVX512VL-FAST-NEXT:    retq
   2195   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 0, i32 undef, i32 1, i32 1, i32 0>
   2196   ret <8 x i16> %shuffle
   2197 }
   2198 
   2199 define <8 x i16> @shuffle_v8i16_467uu675(<8 x i16> %a) {
   2200 ; SSE-LABEL: shuffle_v8i16_467uu675:
   2201 ; SSE:       # %bb.0:
   2202 ; SSE-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
   2203 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
   2204 ; SSE-NEXT:    retq
   2205 ;
   2206 ; AVX1-LABEL: shuffle_v8i16_467uu675:
   2207 ; AVX1:       # %bb.0:
   2208 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
   2209 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
   2210 ; AVX1-NEXT:    retq
   2211 ;
   2212 ; AVX2-SLOW-LABEL: shuffle_v8i16_467uu675:
   2213 ; AVX2-SLOW:       # %bb.0:
   2214 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
   2215 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
   2216 ; AVX2-SLOW-NEXT:    retq
   2217 ;
   2218 ; AVX2-FAST-LABEL: shuffle_v8i16_467uu675:
   2219 ; AVX2-FAST:       # %bb.0:
   2220 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,10,11,8,9,12,13,14,15,10,11]
   2221 ; AVX2-FAST-NEXT:    retq
   2222 ;
   2223 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_467uu675:
   2224 ; AVX512VL-SLOW:       # %bb.0:
   2225 ; AVX512VL-SLOW-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
   2226 ; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
   2227 ; AVX512VL-SLOW-NEXT:    retq
   2228 ;
   2229 ; AVX512VL-FAST-LABEL: shuffle_v8i16_467uu675:
   2230 ; AVX512VL-FAST:       # %bb.0:
   2231 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,10,11,8,9,12,13,14,15,10,11]
   2232 ; AVX512VL-FAST-NEXT:    retq
   2233   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 4, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7, i32 5>
   2234   ret <8 x i16> %shuffle
   2235 }
   2236 
   2237 ;
   2238 ; Shuffle to logical bit shifts
   2239 ;
   2240 define <8 x i16> @shuffle_v8i16_z0z2z4z6(<8 x i16> %a) {
   2241 ; SSE-LABEL: shuffle_v8i16_z0z2z4z6:
   2242 ; SSE:       # %bb.0:
   2243 ; SSE-NEXT:    pslld $16, %xmm0
   2244 ; SSE-NEXT:    retq
   2245 ;
   2246 ; AVX-LABEL: shuffle_v8i16_z0z2z4z6:
   2247 ; AVX:       # %bb.0:
   2248 ; AVX-NEXT:    vpslld $16, %xmm0, %xmm0
   2249 ; AVX-NEXT:    retq
   2250   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6>
   2251   ret <8 x i16> %shuffle
   2252 }
   2253 
   2254 define <8 x i16> @shuffle_v8i16_zzz0zzz4(<8 x i16> %a) {
   2255 ; SSE-LABEL: shuffle_v8i16_zzz0zzz4:
   2256 ; SSE:       # %bb.0:
   2257 ; SSE-NEXT:    psllq $48, %xmm0
   2258 ; SSE-NEXT:    retq
   2259 ;
   2260 ; AVX-LABEL: shuffle_v8i16_zzz0zzz4:
   2261 ; AVX:       # %bb.0:
   2262 ; AVX-NEXT:    vpsllq $48, %xmm0, %xmm0
   2263 ; AVX-NEXT:    retq
   2264   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 8, i32 0, i32 8, i32 8, i32 8, i32 4>
   2265   ret <8 x i16> %shuffle
   2266 }
   2267 
   2268 define <8 x i16> @shuffle_v8i16_zz01zX4X(<8 x i16> %a) {
   2269 ; SSE-LABEL: shuffle_v8i16_zz01zX4X:
   2270 ; SSE:       # %bb.0:
   2271 ; SSE-NEXT:    psllq $32, %xmm0
   2272 ; SSE-NEXT:    retq
   2273 ;
   2274 ; AVX-LABEL: shuffle_v8i16_zz01zX4X:
   2275 ; AVX:       # %bb.0:
   2276 ; AVX-NEXT:    vpsllq $32, %xmm0, %xmm0
   2277 ; AVX-NEXT:    retq
   2278   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 0, i32 1, i32 8, i32 undef, i32 4, i32 undef>
   2279   ret <8 x i16> %shuffle
   2280 }
   2281 
   2282 define <8 x i16> @shuffle_v8i16_z0X2z456(<8 x i16> %a) {
   2283 ; SSE-LABEL: shuffle_v8i16_z0X2z456:
   2284 ; SSE:       # %bb.0:
   2285 ; SSE-NEXT:    psllq $16, %xmm0
   2286 ; SSE-NEXT:    retq
   2287 ;
   2288 ; AVX-LABEL: shuffle_v8i16_z0X2z456:
   2289 ; AVX:       # %bb.0:
   2290 ; AVX-NEXT:    vpsllq $16, %xmm0, %xmm0
   2291 ; AVX-NEXT:    retq
   2292   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 undef, i32 2, i32 8, i32 4, i32 5, i32 6>
   2293   ret <8 x i16> %shuffle
   2294 }
   2295 
   2296 define <8 x i16> @shuffle_v8i16_1z3zXz7z(<8 x i16> %a) {
   2297 ; SSE-LABEL: shuffle_v8i16_1z3zXz7z:
   2298 ; SSE:       # %bb.0:
   2299 ; SSE-NEXT:    psrld $16, %xmm0
   2300 ; SSE-NEXT:    retq
   2301 ;
   2302 ; AVX-LABEL: shuffle_v8i16_1z3zXz7z:
   2303 ; AVX:       # %bb.0:
   2304 ; AVX-NEXT:    vpsrld $16, %xmm0, %xmm0
   2305 ; AVX-NEXT:    retq
   2306   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 8, i32 3, i32 8, i32 undef, i32 8, i32 7, i32 8>
   2307   ret <8 x i16> %shuffle
   2308 }
   2309 
   2310 define <8 x i16> @shuffle_v8i16_1X3z567z(<8 x i16> %a) {
   2311 ; SSE-LABEL: shuffle_v8i16_1X3z567z:
   2312 ; SSE:       # %bb.0:
   2313 ; SSE-NEXT:    psrlq $16, %xmm0
   2314 ; SSE-NEXT:    retq
   2315 ;
   2316 ; AVX-LABEL: shuffle_v8i16_1X3z567z:
   2317 ; AVX:       # %bb.0:
   2318 ; AVX-NEXT:    vpsrlq $16, %xmm0, %xmm0
   2319 ; AVX-NEXT:    retq
   2320   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 undef, i32 3, i32 8, i32 5, i32 6, i32 7, i32 8>
   2321   ret <8 x i16> %shuffle
   2322 }
   2323 
   2324 define <8 x i16> @shuffle_v8i16_23zz67zz(<8 x i16> %a) {
   2325 ; SSE-LABEL: shuffle_v8i16_23zz67zz:
   2326 ; SSE:       # %bb.0:
   2327 ; SSE-NEXT:    psrlq $32, %xmm0
   2328 ; SSE-NEXT:    retq
   2329 ;
   2330 ; AVX-LABEL: shuffle_v8i16_23zz67zz:
   2331 ; AVX:       # %bb.0:
   2332 ; AVX-NEXT:    vpsrlq $32, %xmm0, %xmm0
   2333 ; AVX-NEXT:    retq
   2334   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 2, i32 3, i32 8, i32 8, i32 6, i32 7, i32 8, i32 8>
   2335   ret <8 x i16> %shuffle
   2336 }
   2337 
   2338 define <8 x i16> @shuffle_v8i16_3zXXXzzz(<8 x i16> %a) {
   2339 ; SSE-LABEL: shuffle_v8i16_3zXXXzzz:
   2340 ; SSE:       # %bb.0:
   2341 ; SSE-NEXT:    psrlq $48, %xmm0
   2342 ; SSE-NEXT:    retq
   2343 ;
   2344 ; AVX-LABEL: shuffle_v8i16_3zXXXzzz:
   2345 ; AVX:       # %bb.0:
   2346 ; AVX-NEXT:    vpsrlq $48, %xmm0, %xmm0
   2347 ; AVX-NEXT:    retq
   2348   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 3, i32 8, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8>
   2349   ret <8 x i16> %shuffle
   2350 }
   2351 
   2352 define <8 x i16> @shuffle_v8i16_01u3zzuz(<8 x i16> %a) {
   2353 ; SSE-LABEL: shuffle_v8i16_01u3zzuz:
   2354 ; SSE:       # %bb.0:
   2355 ; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
   2356 ; SSE-NEXT:    retq
   2357 ;
   2358 ; AVX-LABEL: shuffle_v8i16_01u3zzuz:
   2359 ; AVX:       # %bb.0:
   2360 ; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
   2361 ; AVX-NEXT:    retq
   2362   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 3, i32 8, i32 8, i32 undef, i32 8>
   2363   ret <8 x i16> %shuffle
   2364 }
   2365 
   2366 define <8 x i16> @shuffle_v8i16_0z234567(<8 x i16> %a) {
   2367 ; SSE2-LABEL: shuffle_v8i16_0z234567:
   2368 ; SSE2:       # %bb.0:
   2369 ; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
   2370 ; SSE2-NEXT:    retq
   2371 ;
   2372 ; SSSE3-LABEL: shuffle_v8i16_0z234567:
   2373 ; SSSE3:       # %bb.0:
   2374 ; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
   2375 ; SSSE3-NEXT:    retq
   2376 ;
   2377 ; SSE41-LABEL: shuffle_v8i16_0z234567:
   2378 ; SSE41:       # %bb.0:
   2379 ; SSE41-NEXT:    pxor %xmm1, %xmm1
   2380 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
   2381 ; SSE41-NEXT:    retq
   2382 ;
   2383 ; AVX-LABEL: shuffle_v8i16_0z234567:
   2384 ; AVX:       # %bb.0:
   2385 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   2386 ; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
   2387 ; AVX-NEXT:    retq
   2388   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   2389   ret <8 x i16> %shuffle
   2390 }
   2391 
   2392 define <8 x i16> @shuffle_v8i16_0zzzz5z7(<8 x i16> %a) {
   2393 ; SSE2-LABEL: shuffle_v8i16_0zzzz5z7:
   2394 ; SSE2:       # %bb.0:
   2395 ; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
   2396 ; SSE2-NEXT:    retq
   2397 ;
   2398 ; SSSE3-LABEL: shuffle_v8i16_0zzzz5z7:
   2399 ; SSSE3:       # %bb.0:
   2400 ; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
   2401 ; SSSE3-NEXT:    retq
   2402 ;
   2403 ; SSE41-LABEL: shuffle_v8i16_0zzzz5z7:
   2404 ; SSE41:       # %bb.0:
   2405 ; SSE41-NEXT:    pxor %xmm1, %xmm1
   2406 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
   2407 ; SSE41-NEXT:    retq
   2408 ;
   2409 ; AVX-LABEL: shuffle_v8i16_0zzzz5z7:
   2410 ; AVX:       # %bb.0:
   2411 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   2412 ; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
   2413 ; AVX-NEXT:    retq
   2414   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 5, i32 8, i32 7>
   2415   ret <8 x i16> %shuffle
   2416 }
   2417 
   2418 define <8 x i16> @shuffle_v8i16_0123456z(<8 x i16> %a) {
   2419 ; SSE2-LABEL: shuffle_v8i16_0123456z:
   2420 ; SSE2:       # %bb.0:
   2421 ; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
   2422 ; SSE2-NEXT:    retq
   2423 ;
   2424 ; SSSE3-LABEL: shuffle_v8i16_0123456z:
   2425 ; SSSE3:       # %bb.0:
   2426 ; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
   2427 ; SSSE3-NEXT:    retq
   2428 ;
   2429 ; SSE41-LABEL: shuffle_v8i16_0123456z:
   2430 ; SSE41:       # %bb.0:
   2431 ; SSE41-NEXT:    pxor %xmm1, %xmm1
   2432 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
   2433 ; SSE41-NEXT:    retq
   2434 ;
   2435 ; AVX-LABEL: shuffle_v8i16_0123456z:
   2436 ; AVX:       # %bb.0:
   2437 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   2438 ; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
   2439 ; AVX-NEXT:    retq
   2440   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
   2441   ret <8 x i16> %shuffle
   2442 }
   2443 
   2444 define <8 x i16> @shuffle_v8i16_fu3ucc5u(<8 x i16> %a, <8 x i16> %b) {
   2445 ; SSE-LABEL: shuffle_v8i16_fu3ucc5u:
   2446 ; SSE:       # %bb.0:
   2447 ; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   2448 ; SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
   2449 ; SSE-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
   2450 ; SSE-NEXT:    movdqa %xmm1, %xmm0
   2451 ; SSE-NEXT:    retq
   2452 ;
   2453 ; AVX-LABEL: shuffle_v8i16_fu3ucc5u:
   2454 ; AVX:       # %bb.0:
   2455 ; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   2456 ; AVX-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
   2457 ; AVX-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
   2458 ; AVX-NEXT:    retq
   2459   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 15, i32 undef, i32 3, i32 undef, i32 12, i32 12, i32 5, i32 undef>
   2460   ret <8 x i16> %shuffle
   2461 }
   2462 
   2463 define <8 x i16> @shuffle_v8i16_8012345u(<8 x i16> %a) {
   2464 ; SSE-LABEL: shuffle_v8i16_8012345u:
   2465 ; SSE:       # %bb.0:
   2466 ; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   2467 ; SSE-NEXT:    retq
   2468 ;
   2469 ; AVX-LABEL: shuffle_v8i16_8012345u:
   2470 ; AVX:       # %bb.0:
   2471 ; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   2472 ; AVX-NEXT:    retq
   2473   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 undef>
   2474 
   2475   ret <8 x i16> %shuffle
   2476 }
   2477 
   2478 define <8 x i16> @mask_v8i16_012345ef(<8 x i16> %a, <8 x i16> %b) {
   2479 ; SSE2-LABEL: mask_v8i16_012345ef:
   2480 ; SSE2:       # %bb.0:
   2481 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
   2482 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
   2483 ; SSE2-NEXT:    movaps %xmm1, %xmm0
   2484 ; SSE2-NEXT:    retq
   2485 ;
   2486 ; SSSE3-LABEL: mask_v8i16_012345ef:
   2487 ; SSSE3:       # %bb.0:
   2488 ; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
   2489 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
   2490 ; SSSE3-NEXT:    movaps %xmm1, %xmm0
   2491 ; SSSE3-NEXT:    retq
   2492 ;
   2493 ; SSE41-LABEL: mask_v8i16_012345ef:
   2494 ; SSE41:       # %bb.0:
   2495 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
   2496 ; SSE41-NEXT:    retq
   2497 ;
   2498 ; AVX-LABEL: mask_v8i16_012345ef:
   2499 ; AVX:       # %bb.0:
   2500 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
   2501 ; AVX-NEXT:    retq
   2502   %1 = bitcast <8 x i16> %a to <2 x i64>
   2503   %2 = bitcast <8 x i16> %b to <2 x i64>
   2504   %3 = and <2 x i64> %1, <i64 0, i64 -4294967296>
   2505   %4 = and <2 x i64> %2, <i64 -1, i64 4294967295>
   2506   %5 = or <2 x i64> %4, %3
   2507   %6 = bitcast <2 x i64> %5 to <8 x i16>
   2508   ret <8 x i16> %6
   2509 }
   2510 
   2511 define <8 x i16> @insert_dup_mem_v8i16_i32(i32* %ptr) {
   2512 ; SSE-LABEL: insert_dup_mem_v8i16_i32:
   2513 ; SSE:       # %bb.0:
   2514 ; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2515 ; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
   2516 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
   2517 ; SSE-NEXT:    retq
   2518 ;
   2519 ; AVX1-LABEL: insert_dup_mem_v8i16_i32:
   2520 ; AVX1:       # %bb.0:
   2521 ; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2522 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
   2523 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
   2524 ; AVX1-NEXT:    retq
   2525 ;
   2526 ; AVX2OR512VL-LABEL: insert_dup_mem_v8i16_i32:
   2527 ; AVX2OR512VL:       # %bb.0:
   2528 ; AVX2OR512VL-NEXT:    vpbroadcastw (%rdi), %xmm0
   2529 ; AVX2OR512VL-NEXT:    retq
   2530   %tmp = load i32, i32* %ptr, align 4
   2531   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
   2532   %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
   2533   %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> zeroinitializer
   2534   ret <8 x i16> %tmp3
   2535 }
   2536 
   2537 define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) {
   2538 ; SSE-LABEL: insert_dup_mem_v8i16_sext_i16:
   2539 ; SSE:       # %bb.0:
   2540 ; SSE-NEXT:    movswl (%rdi), %eax
   2541 ; SSE-NEXT:    movd %eax, %xmm0
   2542 ; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
   2543 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
   2544 ; SSE-NEXT:    retq
   2545 ;
   2546 ; AVX1-LABEL: insert_dup_mem_v8i16_sext_i16:
   2547 ; AVX1:       # %bb.0:
   2548 ; AVX1-NEXT:    movswl (%rdi), %eax
   2549 ; AVX1-NEXT:    vmovd %eax, %xmm0
   2550 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
   2551 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
   2552 ; AVX1-NEXT:    retq
   2553 ;
   2554 ; AVX2-LABEL: insert_dup_mem_v8i16_sext_i16:
   2555 ; AVX2:       # %bb.0:
   2556 ; AVX2-NEXT:    movswl (%rdi), %eax
   2557 ; AVX2-NEXT:    vmovd %eax, %xmm0
   2558 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
   2559 ; AVX2-NEXT:    retq
   2560 ;
   2561 ; AVX512VL-LABEL: insert_dup_mem_v8i16_sext_i16:
   2562 ; AVX512VL:       # %bb.0:
   2563 ; AVX512VL-NEXT:    movswl (%rdi), %eax
   2564 ; AVX512VL-NEXT:    vpbroadcastw %eax, %xmm0
   2565 ; AVX512VL-NEXT:    retq
   2566   %tmp = load i16, i16* %ptr, align 2
   2567   %tmp1 = sext i16 %tmp to i32
   2568   %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
   2569   %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
   2570   %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> zeroinitializer
   2571   ret <8 x i16> %tmp4
   2572 }
   2573 
   2574 define <8 x i16> @insert_dup_elt1_mem_v8i16_i32(i32* %ptr) {
   2575 ; SSE-LABEL: insert_dup_elt1_mem_v8i16_i32:
   2576 ; SSE:       # %bb.0:
   2577 ; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2578 ; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
   2579 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
   2580 ; SSE-NEXT:    retq
   2581 ;
   2582 ; AVX1-LABEL: insert_dup_elt1_mem_v8i16_i32:
   2583 ; AVX1:       # %bb.0:
   2584 ; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2585 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
   2586 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
   2587 ; AVX1-NEXT:    retq
   2588 ;
   2589 ; AVX2OR512VL-LABEL: insert_dup_elt1_mem_v8i16_i32:
   2590 ; AVX2OR512VL:       # %bb.0:
   2591 ; AVX2OR512VL-NEXT:    vpbroadcastw 2(%rdi), %xmm0
   2592 ; AVX2OR512VL-NEXT:    retq
   2593   %tmp = load i32, i32* %ptr, align 4
   2594   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
   2595   %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
   2596   %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   2597   ret <8 x i16> %tmp3
   2598 }
   2599 
   2600 define <8 x i16> @insert_dup_elt3_mem_v8i16_i32(i32* %ptr) {
   2601 ; SSE2-LABEL: insert_dup_elt3_mem_v8i16_i32:
   2602 ; SSE2:       # %bb.0:
   2603 ; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2604 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,0,1,4,5,6,7]
   2605 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
   2606 ; SSE2-NEXT:    retq
   2607 ;
   2608 ; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_i32:
   2609 ; SSSE3:       # %bb.0:
   2610 ; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2611 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
   2612 ; SSSE3-NEXT:    retq
   2613 ;
   2614 ; SSE41-LABEL: insert_dup_elt3_mem_v8i16_i32:
   2615 ; SSE41:       # %bb.0:
   2616 ; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2617 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
   2618 ; SSE41-NEXT:    retq
   2619 ;
   2620 ; AVX1-LABEL: insert_dup_elt3_mem_v8i16_i32:
   2621 ; AVX1:       # %bb.0:
   2622 ; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2623 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
   2624 ; AVX1-NEXT:    retq
   2625 ;
   2626 ; AVX2OR512VL-LABEL: insert_dup_elt3_mem_v8i16_i32:
   2627 ; AVX2OR512VL:       # %bb.0:
   2628 ; AVX2OR512VL-NEXT:    vpbroadcastw 2(%rdi), %xmm0
   2629 ; AVX2OR512VL-NEXT:    retq
   2630   %tmp = load i32, i32* %ptr, align 4
   2631   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
   2632   %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
   2633   %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
   2634   ret <8 x i16> %tmp3
   2635 }
   2636 
   2637 define <8 x i16> @insert_dup_elt1_mem_v8i16_sext_i16(i16* %ptr) {
   2638 ; SSE-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
   2639 ; SSE:       # %bb.0:
   2640 ; SSE-NEXT:    movswl (%rdi), %eax
   2641 ; SSE-NEXT:    movd %eax, %xmm0
   2642 ; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
   2643 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
   2644 ; SSE-NEXT:    retq
   2645 ;
   2646 ; AVX1-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
   2647 ; AVX1:       # %bb.0:
   2648 ; AVX1-NEXT:    movswl (%rdi), %eax
   2649 ; AVX1-NEXT:    vmovd %eax, %xmm0
   2650 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
   2651 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
   2652 ; AVX1-NEXT:    retq
   2653 ;
   2654 ; AVX2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
   2655 ; AVX2:       # %bb.0:
   2656 ; AVX2-NEXT:    movswl (%rdi), %eax
   2657 ; AVX2-NEXT:    shrl $16, %eax
   2658 ; AVX2-NEXT:    vmovd %eax, %xmm0
   2659 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
   2660 ; AVX2-NEXT:    retq
   2661 ;
   2662 ; AVX512VL-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
   2663 ; AVX512VL:       # %bb.0:
   2664 ; AVX512VL-NEXT:    movswl (%rdi), %eax
   2665 ; AVX512VL-NEXT:    shrl $16, %eax
   2666 ; AVX512VL-NEXT:    vpbroadcastw %eax, %xmm0
   2667 ; AVX512VL-NEXT:    retq
   2668   %tmp = load i16, i16* %ptr, align 2
   2669   %tmp1 = sext i16 %tmp to i32
   2670   %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
   2671   %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
   2672   %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   2673   ret <8 x i16> %tmp4
   2674 }
   2675 
   2676 define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(i16* %ptr) {
   2677 ; SSE2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
   2678 ; SSE2:       # %bb.0:
   2679 ; SSE2-NEXT:    movswl (%rdi), %eax
   2680 ; SSE2-NEXT:    movd %eax, %xmm0
   2681 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,0,1,4,5,6,7]
   2682 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
   2683 ; SSE2-NEXT:    retq
   2684 ;
   2685 ; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
   2686 ; SSSE3:       # %bb.0:
   2687 ; SSSE3-NEXT:    movswl (%rdi), %eax
   2688 ; SSSE3-NEXT:    movd %eax, %xmm0
   2689 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
   2690 ; SSSE3-NEXT:    retq
   2691 ;
   2692 ; SSE41-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
   2693 ; SSE41:       # %bb.0:
   2694 ; SSE41-NEXT:    movswl (%rdi), %eax
   2695 ; SSE41-NEXT:    movd %eax, %xmm0
   2696 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
   2697 ; SSE41-NEXT:    retq
   2698 ;
   2699 ; AVX1-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
   2700 ; AVX1:       # %bb.0:
   2701 ; AVX1-NEXT:    movswl (%rdi), %eax
   2702 ; AVX1-NEXT:    vmovd %eax, %xmm0
   2703 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
   2704 ; AVX1-NEXT:    retq
   2705 ;
   2706 ; AVX2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
   2707 ; AVX2:       # %bb.0:
   2708 ; AVX2-NEXT:    movswl (%rdi), %eax
   2709 ; AVX2-NEXT:    shrl $16, %eax
   2710 ; AVX2-NEXT:    vmovd %eax, %xmm0
   2711 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
   2712 ; AVX2-NEXT:    retq
   2713 ;
   2714 ; AVX512VL-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
   2715 ; AVX512VL:       # %bb.0:
   2716 ; AVX512VL-NEXT:    movswl (%rdi), %eax
   2717 ; AVX512VL-NEXT:    shrl $16, %eax
   2718 ; AVX512VL-NEXT:    vpbroadcastw %eax, %xmm0
   2719 ; AVX512VL-NEXT:    retq
   2720   %tmp = load i16, i16* %ptr, align 2
   2721   %tmp1 = sext i16 %tmp to i32
   2722   %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 1
   2723   %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
   2724   %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
   2725   ret <8 x i16> %tmp4
   2726 }
   2727