Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
      7 
      8 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
      9 target triple = "x86_64-unknown-unknown"
     10 
     11 define <8 x i16> @shuffle_v8i16_01012323(<8 x i16> %a, <8 x i16> %b) {
     12 ; SSE-LABEL: shuffle_v8i16_01012323:
     13 ; SSE:       # BB#0:
     14 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
     15 ; SSE-NEXT:    retq
     16 ;
     17 ; AVX-LABEL: shuffle_v8i16_01012323:
     18 ; AVX:       # BB#0:
     19 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
     20 ; AVX-NEXT:    retq
     21   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3>
     22   ret <8 x i16> %shuffle
     23 }
     24 define <8 x i16> @shuffle_v8i16_67452301(<8 x i16> %a, <8 x i16> %b) {
     25 ; SSE-LABEL: shuffle_v8i16_67452301:
     26 ; SSE:       # BB#0:
     27 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
     28 ; SSE-NEXT:    retq
     29 ;
     30 ; AVX-LABEL: shuffle_v8i16_67452301:
     31 ; AVX:       # BB#0:
     32 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
     33 ; AVX-NEXT:    retq
     34   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
     35   ret <8 x i16> %shuffle
     36 }
     37 define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) {
     38 ; SSE2-LABEL: shuffle_v8i16_456789AB:
     39 ; SSE2:       # BB#0:
     40 ; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
     41 ; SSE2-NEXT:    retq
     42 ;
     43 ; SSSE3-LABEL: shuffle_v8i16_456789AB:
     44 ; SSSE3:       # BB#0:
     45 ; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
     46 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
     47 ; SSSE3-NEXT:    retq
     48 ;
     49 ; SSE41-LABEL: shuffle_v8i16_456789AB:
     50 ; SSE41:       # BB#0:
     51 ; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
     52 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
     53 ; SSE41-NEXT:    retq
     54 ;
     55 ; AVX-LABEL: shuffle_v8i16_456789AB:
     56 ; AVX:       # BB#0:
     57 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
     58 ; AVX-NEXT:    retq
     59   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
     60   ret <8 x i16> %shuffle
     61 }
     62 
     63 define <8 x i16> @shuffle_v8i16_00000000(<8 x i16> %a, <8 x i16> %b) {
     64 ; SSE2-LABEL: shuffle_v8i16_00000000:
     65 ; SSE2:       # BB#0:
     66 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
     67 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
     68 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
     69 ; SSE2-NEXT:    retq
     70 ;
     71 ; SSSE3-LABEL: shuffle_v8i16_00000000:
     72 ; SSSE3:       # BB#0:
     73 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
     74 ; SSSE3-NEXT:    retq
     75 ;
     76 ; SSE41-LABEL: shuffle_v8i16_00000000:
     77 ; SSE41:       # BB#0:
     78 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
     79 ; SSE41-NEXT:    retq
     80 ;
     81 ; AVX1-LABEL: shuffle_v8i16_00000000:
     82 ; AVX1:       # BB#0:
     83 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
     84 ; AVX1-NEXT:    retq
     85 ;
     86 ; AVX2-LABEL: shuffle_v8i16_00000000:
     87 ; AVX2:       # BB#0:
     88 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
     89 ; AVX2-NEXT:    retq
     90   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
     91   ret <8 x i16> %shuffle
     92 }
     93 define <8 x i16> @shuffle_v8i16_00004444(<8 x i16> %a, <8 x i16> %b) {
     94 ; SSE-LABEL: shuffle_v8i16_00004444:
     95 ; SSE:       # BB#0:
     96 ; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
     97 ; SSE-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
     98 ; SSE-NEXT:    retq
     99 ;
    100 ; AVX-LABEL: shuffle_v8i16_00004444:
    101 ; AVX:       # BB#0:
    102 ; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
    103 ; AVX-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
    104 ; AVX-NEXT:    retq
    105   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
    106   ret <8 x i16> %shuffle
    107 }
    108 define <8 x i16> @shuffle_v8i16_u0u1u2u3(<8 x i16> %a, <8 x i16> %b) {
    109 ; SSE-LABEL: shuffle_v8i16_u0u1u2u3:
    110 ; SSE:       # BB#0:
    111 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
    112 ; SSE-NEXT:    retq
    113 ;
    114 ; AVX-LABEL: shuffle_v8i16_u0u1u2u3:
    115 ; AVX:       # BB#0:
    116 ; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
    117 ; AVX-NEXT:    retq
    118   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3>
    119   ret <8 x i16> %shuffle
    120 }
    121 define <8 x i16> @shuffle_v8i16_u4u5u6u7(<8 x i16> %a, <8 x i16> %b) {
    122 ; SSE-LABEL: shuffle_v8i16_u4u5u6u7:
    123 ; SSE:       # BB#0:
    124 ; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
    125 ; SSE-NEXT:    retq
    126 ;
    127 ; AVX-LABEL: shuffle_v8i16_u4u5u6u7:
    128 ; AVX:       # BB#0:
    129 ; AVX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
    130 ; AVX-NEXT:    retq
    131   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7>
    132   ret <8 x i16> %shuffle
    133 }
    134 define <8 x i16> @shuffle_v8i16_31206745(<8 x i16> %a, <8 x i16> %b) {
    135 ; SSE-LABEL: shuffle_v8i16_31206745:
    136 ; SSE:       # BB#0:
    137 ; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
    138 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
    139 ; SSE-NEXT:    retq
    140 ;
    141 ; AVX-LABEL: shuffle_v8i16_31206745:
    142 ; AVX:       # BB#0:
    143 ; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
    144 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
    145 ; AVX-NEXT:    retq
    146   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 5>
    147   ret <8 x i16> %shuffle
    148 }
    149 define <8 x i16> @shuffle_v8i16_44440000(<8 x i16> %a, <8 x i16> %b) {
    150 ; SSE2-LABEL: shuffle_v8i16_44440000:
    151 ; SSE2:       # BB#0:
    152 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
    153 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
    154 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
    155 ; SSE2-NEXT:    retq
    156 ;
    157 ; SSSE3-LABEL: shuffle_v8i16_44440000:
    158 ; SSSE3:       # BB#0:
    159 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
    160 ; SSSE3-NEXT:    retq
    161 ;
    162 ; SSE41-LABEL: shuffle_v8i16_44440000:
    163 ; SSE41:       # BB#0:
    164 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
    165 ; SSE41-NEXT:    retq
    166 ;
    167 ; AVX-LABEL: shuffle_v8i16_44440000:
    168 ; AVX:       # BB#0:
    169 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
    170 ; AVX-NEXT:    retq
    171   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0>
    172   ret <8 x i16> %shuffle
    173 }
    174 define <8 x i16> @shuffle_v8i16_23016745(<8 x i16> %a, <8 x i16> %b) {
    175 ; SSE-LABEL: shuffle_v8i16_23016745:
    176 ; SSE:       # BB#0:
    177 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
    178 ; SSE-NEXT:    retq
    179 ;
    180 ; AVX-LABEL: shuffle_v8i16_23016745:
    181 ; AVX:       # BB#0:
    182 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
    183 ; AVX-NEXT:    retq
    184   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
    185   ret <8 x i16> %shuffle
    186 }
    187 define <8 x i16> @shuffle_v8i16_23026745(<8 x i16> %a, <8 x i16> %b) {
    188 ; SSE-LABEL: shuffle_v8i16_23026745:
    189 ; SSE:       # BB#0:
    190 ; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
    191 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
    192 ; SSE-NEXT:    retq
    193 ;
    194 ; AVX-LABEL: shuffle_v8i16_23026745:
    195 ; AVX:       # BB#0:
    196 ; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
    197 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
    198 ; AVX-NEXT:    retq
    199   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 5>
    200   ret <8 x i16> %shuffle
    201 }
    202 define <8 x i16> @shuffle_v8i16_23016747(<8 x i16> %a, <8 x i16> %b) {
    203 ; SSE-LABEL: shuffle_v8i16_23016747:
    204 ; SSE:       # BB#0:
    205 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
    206 ; SSE-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
    207 ; SSE-NEXT:    retq
    208 ;
    209 ; AVX-LABEL: shuffle_v8i16_23016747:
    210 ; AVX:       # BB#0:
    211 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
    212 ; AVX-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
    213 ; AVX-NEXT:    retq
    214   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 7>
    215   ret <8 x i16> %shuffle
    216 }
    217 define <8 x i16> @shuffle_v8i16_75643120(<8 x i16> %a, <8 x i16> %b) {
    218 ; SSE2-LABEL: shuffle_v8i16_75643120:
    219 ; SSE2:       # BB#0:
    220 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    221 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
    222 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
    223 ; SSE2-NEXT:    retq
    224 ;
    225 ; SSSE3-LABEL: shuffle_v8i16_75643120:
    226 ; SSSE3:       # BB#0:
    227 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
    228 ; SSSE3-NEXT:    retq
    229 ;
    230 ; SSE41-LABEL: shuffle_v8i16_75643120:
    231 ; SSE41:       # BB#0:
    232 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
    233 ; SSE41-NEXT:    retq
    234 ;
    235 ; AVX-LABEL: shuffle_v8i16_75643120:
    236 ; AVX:       # BB#0:
    237 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
    238 ; AVX-NEXT:    retq
    239   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 0>
    240   ret <8 x i16> %shuffle
    241 }
    242 
    243 define <8 x i16> @shuffle_v8i16_10545410(<8 x i16> %a, <8 x i16> %b) {
    244 ; SSE2-LABEL: shuffle_v8i16_10545410:
    245 ; SSE2:       # BB#0:
    246 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
    247 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
    248 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
    249 ; SSE2-NEXT:    retq
    250 ;
    251 ; SSSE3-LABEL: shuffle_v8i16_10545410:
    252 ; SSSE3:       # BB#0:
    253 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
    254 ; SSSE3-NEXT:    retq
    255 ;
    256 ; SSE41-LABEL: shuffle_v8i16_10545410:
    257 ; SSE41:       # BB#0:
    258 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
    259 ; SSE41-NEXT:    retq
    260 ;
    261 ; AVX-LABEL: shuffle_v8i16_10545410:
    262 ; AVX:       # BB#0:
    263 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
    264 ; AVX-NEXT:    retq
    265   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 0>
    266   ret <8 x i16> %shuffle
    267 }
    268 define <8 x i16> @shuffle_v8i16_54105410(<8 x i16> %a, <8 x i16> %b) {
    269 ; SSE2-LABEL: shuffle_v8i16_54105410:
    270 ; SSE2:       # BB#0:
    271 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
    272 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
    273 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
    274 ; SSE2-NEXT:    retq
    275 ;
    276 ; SSSE3-LABEL: shuffle_v8i16_54105410:
    277 ; SSSE3:       # BB#0:
    278 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
    279 ; SSSE3-NEXT:    retq
    280 ;
    281 ; SSE41-LABEL: shuffle_v8i16_54105410:
    282 ; SSE41:       # BB#0:
    283 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
    284 ; SSE41-NEXT:    retq
    285 ;
    286 ; AVX-LABEL: shuffle_v8i16_54105410:
    287 ; AVX:       # BB#0:
    288 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
    289 ; AVX-NEXT:    retq
    290   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 0>
    291   ret <8 x i16> %shuffle
    292 }
    293 define <8 x i16> @shuffle_v8i16_54101054(<8 x i16> %a, <8 x i16> %b) {
    294 ; SSE2-LABEL: shuffle_v8i16_54101054:
    295 ; SSE2:       # BB#0:
    296 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
    297 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
    298 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
    299 ; SSE2-NEXT:    retq
    300 ;
    301 ; SSSE3-LABEL: shuffle_v8i16_54101054:
    302 ; SSSE3:       # BB#0:
    303 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
    304 ; SSSE3-NEXT:    retq
    305 ;
    306 ; SSE41-LABEL: shuffle_v8i16_54101054:
    307 ; SSE41:       # BB#0:
    308 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
    309 ; SSE41-NEXT:    retq
    310 ;
    311 ; AVX-LABEL: shuffle_v8i16_54101054:
    312 ; AVX:       # BB#0:
    313 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
    314 ; AVX-NEXT:    retq
    315   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 4>
    316   ret <8 x i16> %shuffle
    317 }
    318 define <8 x i16> @shuffle_v8i16_04400440(<8 x i16> %a, <8 x i16> %b) {
    319 ; SSE2-LABEL: shuffle_v8i16_04400440:
    320 ; SSE2:       # BB#0:
    321 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
    322 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
    323 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,4,6]
    324 ; SSE2-NEXT:    retq
    325 ;
    326 ; SSSE3-LABEL: shuffle_v8i16_04400440:
    327 ; SSSE3:       # BB#0:
    328 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
    329 ; SSSE3-NEXT:    retq
    330 ;
    331 ; SSE41-LABEL: shuffle_v8i16_04400440:
    332 ; SSE41:       # BB#0:
    333 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
    334 ; SSE41-NEXT:    retq
    335 ;
    336 ; AVX-LABEL: shuffle_v8i16_04400440:
    337 ; AVX:       # BB#0:
    338 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
    339 ; AVX-NEXT:    retq
    340   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 0>
    341   ret <8 x i16> %shuffle
    342 }
    343 define <8 x i16> @shuffle_v8i16_40044004(<8 x i16> %a, <8 x i16> %b) {
    344 ; SSE2-LABEL: shuffle_v8i16_40044004:
    345 ; SSE2:       # BB#0:
    346 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
    347 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,0,0,2,4,5,6,7]
    348 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,4]
    349 ; SSE2-NEXT:    retq
    350 ;
    351 ; SSSE3-LABEL: shuffle_v8i16_40044004:
    352 ; SSSE3:       # BB#0:
    353 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
    354 ; SSSE3-NEXT:    retq
    355 ;
    356 ; SSE41-LABEL: shuffle_v8i16_40044004:
    357 ; SSE41:       # BB#0:
    358 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
    359 ; SSE41-NEXT:    retq
    360 ;
    361 ; AVX-LABEL: shuffle_v8i16_40044004:
    362 ; AVX:       # BB#0:
    363 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
    364 ; AVX-NEXT:    retq
    365   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 4>
    366   ret <8 x i16> %shuffle
    367 }
    368 
    369 define <8 x i16> @shuffle_v8i16_26405173(<8 x i16> %a, <8 x i16> %b) {
    370 ; SSE2-LABEL: shuffle_v8i16_26405173:
    371 ; SSE2:       # BB#0:
    372 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
    373 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
    374 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
    375 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
    376 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
    377 ; SSE2-NEXT:    retq
    378 ;
    379 ; SSSE3-LABEL: shuffle_v8i16_26405173:
    380 ; SSSE3:       # BB#0:
    381 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
    382 ; SSSE3-NEXT:    retq
    383 ;
    384 ; SSE41-LABEL: shuffle_v8i16_26405173:
    385 ; SSE41:       # BB#0:
    386 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
    387 ; SSE41-NEXT:    retq
    388 ;
    389 ; AVX-LABEL: shuffle_v8i16_26405173:
    390 ; AVX:       # BB#0:
    391 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
    392 ; AVX-NEXT:    retq
    393   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 3>
    394   ret <8 x i16> %shuffle
    395 }
    396 define <8 x i16> @shuffle_v8i16_20645173(<8 x i16> %a, <8 x i16> %b) {
    397 ; SSE2-LABEL: shuffle_v8i16_20645173:
    398 ; SSE2:       # BB#0:
    399 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
    400 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
    401 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
    402 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7]
    403 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
    404 ; SSE2-NEXT:    retq
    405 ;
    406 ; SSSE3-LABEL: shuffle_v8i16_20645173:
    407 ; SSSE3:       # BB#0:
    408 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
    409 ; SSSE3-NEXT:    retq
    410 ;
    411 ; SSE41-LABEL: shuffle_v8i16_20645173:
    412 ; SSE41:       # BB#0:
    413 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
    414 ; SSE41-NEXT:    retq
    415 ;
    416 ; AVX-LABEL: shuffle_v8i16_20645173:
    417 ; AVX:       # BB#0:
    418 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
    419 ; AVX-NEXT:    retq
    420   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 3>
    421   ret <8 x i16> %shuffle
    422 }
    423 define <8 x i16> @shuffle_v8i16_26401375(<8 x i16> %a, <8 x i16> %b) {
    424 ; SSE2-LABEL: shuffle_v8i16_26401375:
    425 ; SSE2:       # BB#0:
    426 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
    427 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
    428 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
    429 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
    430 ; SSE2-NEXT:    retq
    431 ;
    432 ; SSSE3-LABEL: shuffle_v8i16_26401375:
    433 ; SSSE3:       # BB#0:
    434 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
    435 ; SSSE3-NEXT:    retq
    436 ;
    437 ; SSE41-LABEL: shuffle_v8i16_26401375:
    438 ; SSE41:       # BB#0:
    439 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
    440 ; SSE41-NEXT:    retq
    441 ;
    442 ; AVX-LABEL: shuffle_v8i16_26401375:
    443 ; AVX:       # BB#0:
    444 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
    445 ; AVX-NEXT:    retq
    446   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 5>
    447   ret <8 x i16> %shuffle
    448 }
    449 
    450 define <8 x i16> @shuffle_v8i16_66751643(<8 x i16> %a, <8 x i16> %b) {
    451 ; SSE2-LABEL: shuffle_v8i16_66751643:
    452 ; SSE2:       # BB#0:
    453 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7]
    454 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
    455 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,0]
    456 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,3,2,4,5,6,7]
    457 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,4,6]
    458 ; SSE2-NEXT:    retq
    459 ;
    460 ; SSSE3-LABEL: shuffle_v8i16_66751643:
    461 ; SSSE3:       # BB#0:
    462 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
    463 ; SSSE3-NEXT:    retq
    464 ;
    465 ; SSE41-LABEL: shuffle_v8i16_66751643:
    466 ; SSE41:       # BB#0:
    467 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
    468 ; SSE41-NEXT:    retq
    469 ;
    470 ; AVX-LABEL: shuffle_v8i16_66751643:
    471 ; AVX:       # BB#0:
    472 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
    473 ; AVX-NEXT:    retq
    474   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 3>
    475   ret <8 x i16> %shuffle
    476 }
    477 
    478 define <8 x i16> @shuffle_v8i16_60514754(<8 x i16> %a, <8 x i16> %b) {
    479 ; SSE2-LABEL: shuffle_v8i16_60514754:
    480 ; SSE2:       # BB#0:
    481 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
    482 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    483 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
    484 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,5,6]
    485 ; SSE2-NEXT:    retq
    486 ;
    487 ; SSSE3-LABEL: shuffle_v8i16_60514754:
    488 ; SSSE3:       # BB#0:
    489 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
    490 ; SSSE3-NEXT:    retq
    491 ;
    492 ; SSE41-LABEL: shuffle_v8i16_60514754:
    493 ; SSE41:       # BB#0:
    494 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
    495 ; SSE41-NEXT:    retq
    496 ;
    497 ; AVX-LABEL: shuffle_v8i16_60514754:
    498 ; AVX:       # BB#0:
    499 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
    500 ; AVX-NEXT:    retq
    501   %shuffle = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 6, i32 0, i32 5, i32 1, i32 4, i32 7, i32 5, i32 4>
    502   ret <8 x i16> %shuffle
    503 }
    504 
    505 define <8 x i16> @shuffle_v8i16_00444444(<8 x i16> %a, <8 x i16> %b) {
    506 ; SSE2-LABEL: shuffle_v8i16_00444444:
    507 ; SSE2:       # BB#0:
    508 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    509 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
    510 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
    511 ; SSE2-NEXT:    retq
    512 ;
    513 ; SSSE3-LABEL: shuffle_v8i16_00444444:
    514 ; SSSE3:       # BB#0:
    515 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
    516 ; SSSE3-NEXT:    retq
    517 ;
    518 ; SSE41-LABEL: shuffle_v8i16_00444444:
    519 ; SSE41:       # BB#0:
    520 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
    521 ; SSE41-NEXT:    retq
    522 ;
    523 ; AVX-LABEL: shuffle_v8i16_00444444:
    524 ; AVX:       # BB#0:
    525 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
    526 ; AVX-NEXT:    retq
    527   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
    528   ret <8 x i16> %shuffle
    529 }
    530 define <8 x i16> @shuffle_v8i16_44004444(<8 x i16> %a, <8 x i16> %b) {
    531 ; SSE2-LABEL: shuffle_v8i16_44004444:
    532 ; SSE2:       # BB#0:
    533 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    534 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,2,0,0,4,5,6,7]
    535 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
    536 ; SSE2-NEXT:    retq
    537 ;
    538 ; SSSE3-LABEL: shuffle_v8i16_44004444:
    539 ; SSSE3:       # BB#0:
    540 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
    541 ; SSSE3-NEXT:    retq
    542 ;
    543 ; SSE41-LABEL: shuffle_v8i16_44004444:
    544 ; SSE41:       # BB#0:
    545 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
    546 ; SSE41-NEXT:    retq
    547 ;
    548 ; AVX-LABEL: shuffle_v8i16_44004444:
    549 ; AVX:       # BB#0:
    550 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
    551 ; AVX-NEXT:    retq
    552   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
    553   ret <8 x i16> %shuffle
    554 }
    555 define <8 x i16> @shuffle_v8i16_04404444(<8 x i16> %a, <8 x i16> %b) {
    556 ; SSE2-LABEL: shuffle_v8i16_04404444:
    557 ; SSE2:       # BB#0:
    558 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    559 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
    560 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
    561 ; SSE2-NEXT:    retq
    562 ;
    563 ; SSSE3-LABEL: shuffle_v8i16_04404444:
    564 ; SSSE3:       # BB#0:
    565 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
    566 ; SSSE3-NEXT:    retq
    567 ;
    568 ; SSE41-LABEL: shuffle_v8i16_04404444:
    569 ; SSE41:       # BB#0:
    570 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
    571 ; SSE41-NEXT:    retq
    572 ;
    573 ; AVX-LABEL: shuffle_v8i16_04404444:
    574 ; AVX:       # BB#0:
    575 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
    576 ; AVX-NEXT:    retq
    577   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
    578   ret <8 x i16> %shuffle
    579 }
    580 define <8 x i16> @shuffle_v8i16_04400000(<8 x i16> %a, <8 x i16> %b) {
    581 ; SSE2-LABEL: shuffle_v8i16_04400000:
    582 ; SSE2:       # BB#0:
    583 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,0,3]
    584 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
    585 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
    586 ; SSE2-NEXT:    retq
    587 ;
    588 ; SSSE3-LABEL: shuffle_v8i16_04400000:
    589 ; SSSE3:       # BB#0:
    590 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
    591 ; SSSE3-NEXT:    retq
    592 ;
    593 ; SSE41-LABEL: shuffle_v8i16_04400000:
    594 ; SSE41:       # BB#0:
    595 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
    596 ; SSE41-NEXT:    retq
    597 ;
    598 ; AVX-LABEL: shuffle_v8i16_04400000:
    599 ; AVX:       # BB#0:
    600 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
    601 ; AVX-NEXT:    retq
    602   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0>
    603   ret <8 x i16> %shuffle
    604 }
    605 define <8 x i16> @shuffle_v8i16_04404567(<8 x i16> %a, <8 x i16> %b) {
    606 ; SSE-LABEL: shuffle_v8i16_04404567:
    607 ; SSE:       # BB#0:
    608 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    609 ; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
    610 ; SSE-NEXT:    retq
    611 ;
    612 ; AVX-LABEL: shuffle_v8i16_04404567:
    613 ; AVX:       # BB#0:
    614 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    615 ; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
    616 ; AVX-NEXT:    retq
    617   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 7>
    618   ret <8 x i16> %shuffle
    619 }
    620 
    621 define <8 x i16> @shuffle_v8i16_0X444444(<8 x i16> %a, <8 x i16> %b) {
    622 ; SSE2-LABEL: shuffle_v8i16_0X444444:
    623 ; SSE2:       # BB#0:
    624 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    625 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,2,4,5,6,7]
    626 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
    627 ; SSE2-NEXT:    retq
    628 ;
    629 ; SSSE3-LABEL: shuffle_v8i16_0X444444:
    630 ; SSSE3:       # BB#0:
    631 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
    632 ; SSSE3-NEXT:    retq
    633 ;
    634 ; SSE41-LABEL: shuffle_v8i16_0X444444:
    635 ; SSE41:       # BB#0:
    636 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
    637 ; SSE41-NEXT:    retq
    638 ;
    639 ; AVX-LABEL: shuffle_v8i16_0X444444:
    640 ; AVX:       # BB#0:
    641 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
    642 ; AVX-NEXT:    retq
    643   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
    644   ret <8 x i16> %shuffle
    645 }
    646 define <8 x i16> @shuffle_v8i16_44X04444(<8 x i16> %a, <8 x i16> %b) {
    647 ; SSE2-LABEL: shuffle_v8i16_44X04444:
    648 ; SSE2:       # BB#0:
    649 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    650 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,0,4,5,6,7]
    651 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
    652 ; SSE2-NEXT:    retq
    653 ;
    654 ; SSSE3-LABEL: shuffle_v8i16_44X04444:
    655 ; SSSE3:       # BB#0:
    656 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
    657 ; SSSE3-NEXT:    retq
    658 ;
    659 ; SSE41-LABEL: shuffle_v8i16_44X04444:
    660 ; SSE41:       # BB#0:
    661 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
    662 ; SSE41-NEXT:    retq
    663 ;
    664 ; AVX-LABEL: shuffle_v8i16_44X04444:
    665 ; AVX:       # BB#0:
    666 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
    667 ; AVX-NEXT:    retq
    668   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 4>
    669   ret <8 x i16> %shuffle
    670 }
    671 define <8 x i16> @shuffle_v8i16_X4404444(<8 x i16> %a, <8 x i16> %b) {
    672 ; SSE2-LABEL: shuffle_v8i16_X4404444:
    673 ; SSE2:       # BB#0:
    674 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    675 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
    676 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
    677 ; SSE2-NEXT:    retq
    678 ;
    679 ; SSSE3-LABEL: shuffle_v8i16_X4404444:
    680 ; SSSE3:       # BB#0:
    681 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
    682 ; SSSE3-NEXT:    retq
    683 ;
    684 ; SSE41-LABEL: shuffle_v8i16_X4404444:
    685 ; SSE41:       # BB#0:
    686 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
    687 ; SSE41-NEXT:    retq
    688 ;
    689 ; AVX-LABEL: shuffle_v8i16_X4404444:
    690 ; AVX:       # BB#0:
    691 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
    692 ; AVX-NEXT:    retq
    693   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
    694   ret <8 x i16> %shuffle
    695 }
    696 
    697 define <8 x i16> @shuffle_v8i16_0127XXXX(<8 x i16> %a, <8 x i16> %b) {
    698 ; SSE2-LABEL: shuffle_v8i16_0127XXXX:
    699 ; SSE2:       # BB#0:
    700 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
    701 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
    702 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    703 ; SSE2-NEXT:    retq
    704 ;
    705 ; SSSE3-LABEL: shuffle_v8i16_0127XXXX:
    706 ; SSSE3:       # BB#0:
    707 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
    708 ; SSSE3-NEXT:    retq
    709 ;
    710 ; SSE41-LABEL: shuffle_v8i16_0127XXXX:
    711 ; SSE41:       # BB#0:
    712 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
    713 ; SSE41-NEXT:    retq
    714 ;
    715 ; AVX-LABEL: shuffle_v8i16_0127XXXX:
    716 ; AVX:       # BB#0:
    717 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
    718 ; AVX-NEXT:    retq
    719   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
    720   ret <8 x i16> %shuffle
    721 }
    722 
    723 define <8 x i16> @shuffle_v8i16_XXXX4563(<8 x i16> %a, <8 x i16> %b) {
    724 ; SSE2-LABEL: shuffle_v8i16_XXXX4563:
    725 ; SSE2:       # BB#0:
    726 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
    727 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
    728 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
    729 ; SSE2-NEXT:    retq
    730 ;
    731 ; SSSE3-LABEL: shuffle_v8i16_XXXX4563:
    732 ; SSSE3:       # BB#0:
    733 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
    734 ; SSSE3-NEXT:    retq
    735 ;
    736 ; SSE41-LABEL: shuffle_v8i16_XXXX4563:
    737 ; SSE41:       # BB#0:
    738 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
    739 ; SSE41-NEXT:    retq
    740 ;
    741 ; AVX-LABEL: shuffle_v8i16_XXXX4563:
    742 ; AVX:       # BB#0:
    743 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
    744 ; AVX-NEXT:    retq
    745   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 3>
    746   ret <8 x i16> %shuffle
    747 }
    748 
    749 define <8 x i16> @shuffle_v8i16_4563XXXX(<8 x i16> %a, <8 x i16> %b) {
    750 ; SSE2-LABEL: shuffle_v8i16_4563XXXX:
    751 ; SSE2:       # BB#0:
    752 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
    753 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
    754 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3]
    755 ; SSE2-NEXT:    retq
    756 ;
    757 ; SSSE3-LABEL: shuffle_v8i16_4563XXXX:
    758 ; SSSE3:       # BB#0:
    759 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
    760 ; SSSE3-NEXT:    retq
    761 ;
    762 ; SSE41-LABEL: shuffle_v8i16_4563XXXX:
    763 ; SSE41:       # BB#0:
    764 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
    765 ; SSE41-NEXT:    retq
    766 ;
    767 ; AVX-LABEL: shuffle_v8i16_4563XXXX:
    768 ; AVX:       # BB#0:
    769 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
    770 ; AVX-NEXT:    retq
    771   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
    772   ret <8 x i16> %shuffle
    773 }
    774 
    775 define <8 x i16> @shuffle_v8i16_01274563(<8 x i16> %a, <8 x i16> %b) {
    776 ; SSE2-LABEL: shuffle_v8i16_01274563:
    777 ; SSE2:       # BB#0:
    778 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
    779 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
    780 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
    781 ; SSE2-NEXT:    retq
    782 ;
    783 ; SSSE3-LABEL: shuffle_v8i16_01274563:
    784 ; SSSE3:       # BB#0:
    785 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
    786 ; SSSE3-NEXT:    retq
    787 ;
    788 ; SSE41-LABEL: shuffle_v8i16_01274563:
    789 ; SSE41:       # BB#0:
    790 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
    791 ; SSE41-NEXT:    retq
    792 ;
    793 ; AVX-LABEL: shuffle_v8i16_01274563:
    794 ; AVX:       # BB#0:
    795 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
    796 ; AVX-NEXT:    retq
    797   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 3>
    798   ret <8 x i16> %shuffle
    799 }
    800 
    801 define <8 x i16> @shuffle_v8i16_45630127(<8 x i16> %a, <8 x i16> %b) {
    802 ; SSE2-LABEL: shuffle_v8i16_45630127:
    803 ; SSE2:       # BB#0:
    804 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
    805 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
    806 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1]
    807 ; SSE2-NEXT:    retq
    808 ;
    809 ; SSSE3-LABEL: shuffle_v8i16_45630127:
    810 ; SSSE3:       # BB#0:
    811 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
    812 ; SSSE3-NEXT:    retq
    813 ;
    814 ; SSE41-LABEL: shuffle_v8i16_45630127:
    815 ; SSE41:       # BB#0:
    816 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
    817 ; SSE41-NEXT:    retq
    818 ;
    819 ; AVX-LABEL: shuffle_v8i16_45630127:
    820 ; AVX:       # BB#0:
    821 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
    822 ; AVX-NEXT:    retq
    823   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 7>
    824   ret <8 x i16> %shuffle
    825 }
    826 
    827 define <8 x i16> @shuffle_v8i16_37102735(<8 x i16> %a, <8 x i16> %b) {
    828 ; SSE2-LABEL: shuffle_v8i16_37102735:
    829 ; SSE2:       # BB#0:
    830 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
    831 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
    832 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
    833 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    834 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
    835 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6]
    836 ; SSE2-NEXT:    retq
    837 ;
    838 ; SSSE3-LABEL: shuffle_v8i16_37102735:
    839 ; SSSE3:       # BB#0:
    840 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
    841 ; SSSE3-NEXT:    retq
    842 ;
    843 ; SSE41-LABEL: shuffle_v8i16_37102735:
    844 ; SSE41:       # BB#0:
    845 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
    846 ; SSE41-NEXT:    retq
    847 ;
    848 ; AVX-LABEL: shuffle_v8i16_37102735:
    849 ; AVX:       # BB#0:
    850 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
    851 ; AVX-NEXT:    retq
    852   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 5>
    853   ret <8 x i16> %shuffle
    854 }
    855 
    856 define <8 x i16> @shuffle_v8i16_08192a3b(<8 x i16> %a, <8 x i16> %b) {
    857 ; SSE-LABEL: shuffle_v8i16_08192a3b:
    858 ; SSE:       # BB#0:
    859 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    860 ; SSE-NEXT:    retq
    861 ;
    862 ; AVX-LABEL: shuffle_v8i16_08192a3b:
    863 ; AVX:       # BB#0:
    864 ; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    865 ; AVX-NEXT:    retq
    866   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
    867   ret <8 x i16> %shuffle
    868 }
    869 
    870 define <8 x i16> @shuffle_v8i16_0c1d2e3f(<8 x i16> %a, <8 x i16> %b) {
    871 ; SSE-LABEL: shuffle_v8i16_0c1d2e3f:
    872 ; SSE:       # BB#0:
    873 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
    874 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    875 ; SSE-NEXT:    retq
    876 ;
    877 ; AVX-LABEL: shuffle_v8i16_0c1d2e3f:
    878 ; AVX:       # BB#0:
    879 ; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
    880 ; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    881 ; AVX-NEXT:    retq
    882   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 2, i32 14, i32 3, i32 15>
    883   ret <8 x i16> %shuffle
    884 }
    885 
    886 define <8 x i16> @shuffle_v8i16_4c5d6e7f(<8 x i16> %a, <8 x i16> %b) {
    887 ; SSE-LABEL: shuffle_v8i16_4c5d6e7f:
    888 ; SSE:       # BB#0:
    889 ; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    890 ; SSE-NEXT:    retq
    891 ;
    892 ; AVX-LABEL: shuffle_v8i16_4c5d6e7f:
    893 ; AVX:       # BB#0:
    894 ; AVX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    895 ; AVX-NEXT:    retq
    896   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
    897   ret <8 x i16> %shuffle
    898 }
    899 
    900 define <8 x i16> @shuffle_v8i16_48596a7b(<8 x i16> %a, <8 x i16> %b) {
    901 ; SSE-LABEL: shuffle_v8i16_48596a7b:
    902 ; SSE:       # BB#0:
    903 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    904 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    905 ; SSE-NEXT:    retq
    906 ;
    907 ; AVX-LABEL: shuffle_v8i16_48596a7b:
    908 ; AVX:       # BB#0:
    909 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    910 ; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    911 ; AVX-NEXT:    retq
    912   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 8, i32 5, i32 9, i32 6, i32 10, i32 7, i32 11>
    913   ret <8 x i16> %shuffle
    914 }
    915 
    916 define <8 x i16> @shuffle_v8i16_08196e7f(<8 x i16> %a, <8 x i16> %b) {
    917 ; SSE-LABEL: shuffle_v8i16_08196e7f:
    918 ; SSE:       # BB#0:
    919 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
    920 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
    921 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    922 ; SSE-NEXT:    retq
    923 ;
    924 ; AVX-LABEL: shuffle_v8i16_08196e7f:
    925 ; AVX:       # BB#0:
    926 ; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
    927 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
    928 ; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    929 ; AVX-NEXT:    retq
    930   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 6, i32 14, i32 7, i32 15>
    931   ret <8 x i16> %shuffle
    932 }
    933 
    934 define <8 x i16> @shuffle_v8i16_0c1d6879(<8 x i16> %a, <8 x i16> %b) {
    935 ; SSE-LABEL: shuffle_v8i16_0c1d6879:
    936 ; SSE:       # BB#0:
    937 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
    938 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
    939 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    940 ; SSE-NEXT:    retq
    941 ;
    942 ; AVX-LABEL: shuffle_v8i16_0c1d6879:
    943 ; AVX:       # BB#0:
    944 ; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
    945 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
    946 ; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    947 ; AVX-NEXT:    retq
    948   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 6, i32 8, i32 7, i32 9>
    949   ret <8 x i16> %shuffle
    950 }
    951 
    952 define <8 x i16> @shuffle_v8i16_109832ba(<8 x i16> %a, <8 x i16> %b) {
    953 ; SSE-LABEL: shuffle_v8i16_109832ba:
    954 ; SSE:       # BB#0:
    955 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    956 ; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
    957 ; SSE-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
    958 ; SSE-NEXT:    retq
    959 ;
    960 ; AVX-LABEL: shuffle_v8i16_109832ba:
    961 ; AVX:       # BB#0:
    962 ; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    963 ; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
    964 ; AVX-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
    965 ; AVX-NEXT:    retq
    966   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 9, i32 8, i32 3, i32 2, i32 11, i32 10>
    967   ret <8 x i16> %shuffle
    968 }
    969 
    970 define <8 x i16> @shuffle_v8i16_8091a2b3(<8 x i16> %a, <8 x i16> %b) {
    971 ; SSE-LABEL: shuffle_v8i16_8091a2b3:
    972 ; SSE:       # BB#0:
    973 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
    974 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    975 ; SSE-NEXT:    retq
    976 ;
    977 ; AVX-LABEL: shuffle_v8i16_8091a2b3:
    978 ; AVX:       # BB#0:
    979 ; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
    980 ; AVX-NEXT:    retq
    981   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3>
    982   ret <8 x i16> %shuffle
    983 }
    984 define <8 x i16> @shuffle_v8i16_c4d5e6f7(<8 x i16> %a, <8 x i16> %b) {
    985 ; SSE-LABEL: shuffle_v8i16_c4d5e6f7:
    986 ; SSE:       # BB#0:
    987 ; SSE-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
    988 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    989 ; SSE-NEXT:    retq
    990 ;
    991 ; AVX-LABEL: shuffle_v8i16_c4d5e6f7:
    992 ; AVX:       # BB#0:
    993 ; AVX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
    994 ; AVX-NEXT:    retq
    995   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
    996   ret <8 x i16> %shuffle
    997 }
    998 
    999 define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) {
   1000 ; SSE-LABEL: shuffle_v8i16_0213cedf:
   1001 ; SSE:       # BB#0:
   1002 ; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
   1003 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
   1004 ; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
   1005 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1006 ; SSE-NEXT:    retq
   1007 ;
   1008 ; AVX-LABEL: shuffle_v8i16_0213cedf:
   1009 ; AVX:       # BB#0:
   1010 ; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
   1011 ; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
   1012 ; AVX-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
   1013 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1014 ; AVX-NEXT:    retq
   1015   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 12, i32 14, i32 13, i32 15>
   1016   ret <8 x i16> %shuffle
   1017 }
   1018 
   1019 define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) {
   1020 ; SSE2-LABEL: shuffle_v8i16_443aXXXX:
   1021 ; SSE2:       # BB#0:
   1022 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,0,65535,65535,65535,65535,65535]
   1023 ; SSE2-NEXT:    pand %xmm2, %xmm0
   1024 ; SSE2-NEXT:    pandn %xmm1, %xmm2
   1025 ; SSE2-NEXT:    por %xmm0, %xmm2
   1026 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,1,2,3]
   1027 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
   1028 ; SSE2-NEXT:    retq
   1029 ;
   1030 ; SSSE3-LABEL: shuffle_v8i16_443aXXXX:
   1031 ; SSSE3:       # BB#0:
   1032 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[4,5,u,u,u,u,u,u,u,u]
   1033 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7],zero,zero,xmm0[u,u,u,u,u,u,u,u]
   1034 ; SSSE3-NEXT:    por %xmm1, %xmm0
   1035 ; SSSE3-NEXT:    retq
   1036 ;
   1037 ; SSE41-LABEL: shuffle_v8i16_443aXXXX:
   1038 ; SSE41:       # BB#0:
   1039 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   1040 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
   1041 ; SSE41-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
   1042 ; SSE41-NEXT:    retq
   1043 ;
   1044 ; AVX-LABEL: shuffle_v8i16_443aXXXX:
   1045 ; AVX:       # BB#0:
   1046 ; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   1047 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
   1048 ; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
   1049 ; AVX-NEXT:    retq
   1050   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 3, i32 10, i32 undef, i32 undef, i32 undef, i32 undef>
   1051   ret <8 x i16> %shuffle
   1052 }
   1053 
   1054 define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) {
   1055 ; SSE2-LABEL: shuffle_v8i16_032dXXXX:
   1056 ; SSE2:       # BB#0:
   1057 ; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
   1058 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[3,1,2,0]
   1059 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,7]
   1060 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
   1061 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
   1062 ; SSE2-NEXT:    retq
   1063 ;
   1064 ; SSSE3-LABEL: shuffle_v8i16_032dXXXX:
   1065 ; SSSE3:       # BB#0:
   1066 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u]
   1067 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
   1068 ; SSSE3-NEXT:    por %xmm1, %xmm0
   1069 ; SSSE3-NEXT:    retq
   1070 ;
   1071 ; SSE41-LABEL: shuffle_v8i16_032dXXXX:
   1072 ; SSE41:       # BB#0:
   1073 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
   1074 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
   1075 ; SSE41-NEXT:    retq
   1076 ;
   1077 ; AVX1-LABEL: shuffle_v8i16_032dXXXX:
   1078 ; AVX1:       # BB#0:
   1079 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
   1080 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
   1081 ; AVX1-NEXT:    retq
   1082 ;
   1083 ; AVX2-LABEL: shuffle_v8i16_032dXXXX:
   1084 ; AVX2:       # BB#0:
   1085 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
   1086 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
   1087 ; AVX2-NEXT:    retq
   1088   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 3, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
   1089   ret <8 x i16> %shuffle
   1090 }
   1091 define <8 x i16> @shuffle_v8i16_XXXdXXXX(<8 x i16> %a, <8 x i16> %b) {
   1092 ; SSE-LABEL: shuffle_v8i16_XXXdXXXX:
   1093 ; SSE:       # BB#0:
   1094 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
   1095 ; SSE-NEXT:    retq
   1096 ;
   1097 ; AVX-LABEL: shuffle_v8i16_XXXdXXXX:
   1098 ; AVX:       # BB#0:
   1099 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
   1100 ; AVX-NEXT:    retq
   1101   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
   1102   ret <8 x i16> %shuffle
   1103 }
   1104 
   1105 define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) {
   1106 ; SSE2-LABEL: shuffle_v8i16_012dXXXX:
   1107 ; SSE2:       # BB#0:
   1108 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535]
   1109 ; SSE2-NEXT:    pand %xmm2, %xmm0
   1110 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
   1111 ; SSE2-NEXT:    pandn %xmm1, %xmm2
   1112 ; SSE2-NEXT:    por %xmm2, %xmm0
   1113 ; SSE2-NEXT:    retq
   1114 ;
   1115 ; SSSE3-LABEL: shuffle_v8i16_012dXXXX:
   1116 ; SSSE3:       # BB#0:
   1117 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u]
   1118 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
   1119 ; SSSE3-NEXT:    por %xmm1, %xmm0
   1120 ; SSSE3-NEXT:    retq
   1121 ;
   1122 ; SSE41-LABEL: shuffle_v8i16_012dXXXX:
   1123 ; SSE41:       # BB#0:
   1124 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
   1125 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   1126 ; SSE41-NEXT:    retq
   1127 ;
   1128 ; AVX-LABEL: shuffle_v8i16_012dXXXX:
   1129 ; AVX:       # BB#0:
   1130 ; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
   1131 ; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   1132 ; AVX-NEXT:    retq
   1133   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
   1134   ret <8 x i16> %shuffle
   1135 }
   1136 
   1137 define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) {
   1138 ; SSE2-LABEL: shuffle_v8i16_XXXXcde3:
   1139 ; SSE2:       # BB#0:
   1140 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535,65535,65535,65535,0]
   1141 ; SSE2-NEXT:    pand %xmm2, %xmm1
   1142 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
   1143 ; SSE2-NEXT:    pandn %xmm0, %xmm2
   1144 ; SSE2-NEXT:    por %xmm1, %xmm2
   1145 ; SSE2-NEXT:    movdqa %xmm2, %xmm0
   1146 ; SSE2-NEXT:    retq
   1147 ;
   1148 ; SSSE3-LABEL: shuffle_v8i16_XXXXcde3:
   1149 ; SSSE3:       # BB#0:
   1150 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm0[6,7]
   1151 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,8,9,10,11,12,13],zero,zero
   1152 ; SSSE3-NEXT:    por %xmm1, %xmm0
   1153 ; SSSE3-NEXT:    retq
   1154 ;
   1155 ; SSE41-LABEL: shuffle_v8i16_XXXXcde3:
   1156 ; SSE41:       # BB#0:
   1157 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
   1158 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
   1159 ; SSE41-NEXT:    retq
   1160 ;
   1161 ; AVX1-LABEL: shuffle_v8i16_XXXXcde3:
   1162 ; AVX1:       # BB#0:
   1163 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
   1164 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
   1165 ; AVX1-NEXT:    retq
   1166 ;
   1167 ; AVX2-LABEL: shuffle_v8i16_XXXXcde3:
   1168 ; AVX2:       # BB#0:
   1169 ; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm0
   1170 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
   1171 ; AVX2-NEXT:    retq
   1172   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 3>
   1173   ret <8 x i16> %shuffle
   1174 }
   1175 
   1176 define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) {
   1177 ; SSE2-LABEL: shuffle_v8i16_cde3XXXX:
   1178 ; SSE2:       # BB#0:
   1179 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535]
   1180 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
   1181 ; SSE2-NEXT:    pand %xmm2, %xmm1
   1182 ; SSE2-NEXT:    pandn %xmm0, %xmm2
   1183 ; SSE2-NEXT:    por %xmm1, %xmm2
   1184 ; SSE2-NEXT:    movdqa %xmm2, %xmm0
   1185 ; SSE2-NEXT:    retq
   1186 ;
   1187 ; SSSE3-LABEL: shuffle_v8i16_cde3XXXX:
   1188 ; SSSE3:       # BB#0:
   1189 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[6,7,u,u,u,u,u,u,u,u]
   1190 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13],zero,zero,xmm1[u,u,u,u,u,u,u,u]
   1191 ; SSSE3-NEXT:    por %xmm1, %xmm0
   1192 ; SSSE3-NEXT:    retq
   1193 ;
   1194 ; SSE41-LABEL: shuffle_v8i16_cde3XXXX:
   1195 ; SSE41:       # BB#0:
   1196 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
   1197 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
   1198 ; SSE41-NEXT:    retq
   1199 ;
   1200 ; AVX-LABEL: shuffle_v8i16_cde3XXXX:
   1201 ; AVX:       # BB#0:
   1202 ; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
   1203 ; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
   1204 ; AVX-NEXT:    retq
   1205   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 13, i32 14, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
   1206   ret <8 x i16> %shuffle
   1207 }
   1208 
   1209 define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) {
   1210 ; SSE2-LABEL: shuffle_v8i16_012dcde3:
   1211 ; SSE2:       # BB#0:
   1212 ; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
   1213 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,3,2,1]
   1214 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
   1215 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
   1216 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
   1217 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,7]
   1218 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
   1219 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,3,0,2,4,5,6,7]
   1220 ; SSE2-NEXT:    retq
   1221 ;
   1222 ; SSSE3-LABEL: shuffle_v8i16_012dcde3:
   1223 ; SSSE3:       # BB#0:
   1224 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,8,9,10,11,12,13],zero,zero
   1225 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7]
   1226 ; SSSE3-NEXT:    por %xmm1, %xmm0
   1227 ; SSSE3-NEXT:    retq
   1228 ;
   1229 ; SSE41-LABEL: shuffle_v8i16_012dcde3:
   1230 ; SSE41:       # BB#0:
   1231 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
   1232 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
   1233 ; SSE41-NEXT:    retq
   1234 ;
   1235 ; AVX1-LABEL: shuffle_v8i16_012dcde3:
   1236 ; AVX1:       # BB#0:
   1237 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
   1238 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
   1239 ; AVX1-NEXT:    retq
   1240 ;
   1241 ; AVX2-LABEL: shuffle_v8i16_012dcde3:
   1242 ; AVX2:       # BB#0:
   1243 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
   1244 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
   1245 ; AVX2-NEXT:    retq
   1246   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 12, i32 13, i32 14, i32 3>
   1247   ret <8 x i16> %shuffle
   1248 }
   1249 
   1250 define <8 x i16> @shuffle_v8i16_0923cde7(<8 x i16> %a, <8 x i16> %b) {
   1251 ; SSE2-LABEL: shuffle_v8i16_0923cde7:
   1252 ; SSE2:       # BB#0:
   1253 ; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535]
   1254 ; SSE2-NEXT:    andps %xmm2, %xmm0
   1255 ; SSE2-NEXT:    andnps %xmm1, %xmm2
   1256 ; SSE2-NEXT:    orps %xmm2, %xmm0
   1257 ; SSE2-NEXT:    retq
   1258 ;
   1259 ; SSSE3-LABEL: shuffle_v8i16_0923cde7:
   1260 ; SSSE3:       # BB#0:
   1261 ; SSSE3-NEXT:    movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535]
   1262 ; SSSE3-NEXT:    andps %xmm2, %xmm0
   1263 ; SSSE3-NEXT:    andnps %xmm1, %xmm2
   1264 ; SSSE3-NEXT:    orps %xmm2, %xmm0
   1265 ; SSSE3-NEXT:    retq
   1266 ;
   1267 ; SSE41-LABEL: shuffle_v8i16_0923cde7:
   1268 ; SSE41:       # BB#0:
   1269 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
   1270 ; SSE41-NEXT:    retq
   1271 ;
   1272 ; AVX-LABEL: shuffle_v8i16_0923cde7:
   1273 ; AVX:       # BB#0:
   1274 ; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
   1275 ; AVX-NEXT:    retq
   1276   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 12, i32 13, i32 14, i32 7>
   1277   ret <8 x i16> %shuffle
   1278 }
   1279 
   1280 define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) {
   1281 ; SSE2-LABEL: shuffle_v8i16_XXX1X579:
   1282 ; SSE2:       # BB#0:
   1283 ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,1,2,0]
   1284 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,65535,65535,0]
   1285 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
   1286 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
   1287 ; SSE2-NEXT:    pand %xmm1, %xmm0
   1288 ; SSE2-NEXT:    pandn %xmm2, %xmm1
   1289 ; SSE2-NEXT:    por %xmm0, %xmm1
   1290 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
   1291 ; SSE2-NEXT:    retq
   1292 ;
   1293 ; SSSE3-LABEL: shuffle_v8i16_XXX1X579:
   1294 ; SSSE3:       # BB#0:
   1295 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u],zero,zero,xmm1[u,u],zero,zero,zero,zero,xmm1[2,3]
   1296 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,2,3,u,u,10,11,14,15],zero,zero
   1297 ; SSSE3-NEXT:    por %xmm1, %xmm0
   1298 ; SSSE3-NEXT:    retq
   1299 ;
   1300 ; SSE41-LABEL: shuffle_v8i16_XXX1X579:
   1301 ; SSE41:       # BB#0:
   1302 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
   1303 ; SSE41-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
   1304 ; SSE41-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
   1305 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   1306 ; SSE41-NEXT:    retq
   1307 ;
   1308 ; AVX1-LABEL: shuffle_v8i16_XXX1X579:
   1309 ; AVX1:       # BB#0:
   1310 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
   1311 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
   1312 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
   1313 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   1314 ; AVX1-NEXT:    retq
   1315 ;
   1316 ; AVX2-LABEL: shuffle_v8i16_XXX1X579:
   1317 ; AVX2:       # BB#0:
   1318 ; AVX2-NEXT:    vpbroadcastd %xmm1, %xmm1
   1319 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
   1320 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
   1321 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   1322 ; AVX2-NEXT:    retq
   1323   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 9>
   1324   ret <8 x i16> %shuffle
   1325 }
   1326 
   1327 define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) {
   1328 ; SSE2-LABEL: shuffle_v8i16_XX4X8acX:
   1329 ; SSE2:       # BB#0:
   1330 ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
   1331 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
   1332 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
   1333 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
   1334 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
   1335 ; SSE2-NEXT:    retq
   1336 ;
   1337 ; SSSE3-LABEL: shuffle_v8i16_XX4X8acX:
   1338 ; SSSE3:       # BB#0:
   1339 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,8,9,u,u],zero,zero,zero,zero,zero,zero,xmm0[u,u]
   1340 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u],zero,zero,xmm1[u,u,0,1,4,5,8,9,u,u]
   1341 ; SSSE3-NEXT:    por %xmm1, %xmm0
   1342 ; SSSE3-NEXT:    retq
   1343 ;
   1344 ; SSE41-LABEL: shuffle_v8i16_XX4X8acX:
   1345 ; SSE41:       # BB#0:
   1346 ; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
   1347 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
   1348 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
   1349 ; SSE41-NEXT:    retq
   1350 ;
   1351 ; AVX1-LABEL: shuffle_v8i16_XX4X8acX:
   1352 ; AVX1:       # BB#0:
   1353 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
   1354 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
   1355 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
   1356 ; AVX1-NEXT:    retq
   1357 ;
   1358 ; AVX2-LABEL: shuffle_v8i16_XX4X8acX:
   1359 ; AVX2:       # BB#0:
   1360 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
   1361 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
   1362 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
   1363 ; AVX2-NEXT:    retq
   1364   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 undef>
   1365   ret <8 x i16> %shuffle
   1366 }
   1367 
   1368 define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) {
   1369 ; SSE-LABEL: shuffle_v8i16_8zzzzzzz:
   1370 ; SSE:       # BB#0:
   1371 ; SSE-NEXT:    movzwl %di, %eax
   1372 ; SSE-NEXT:    movd %eax, %xmm0
   1373 ; SSE-NEXT:    retq
   1374 ;
   1375 ; AVX-LABEL: shuffle_v8i16_8zzzzzzz:
   1376 ; AVX:       # BB#0:
   1377 ; AVX-NEXT:    movzwl %di, %eax
   1378 ; AVX-NEXT:    vmovd %eax, %xmm0
   1379 ; AVX-NEXT:    retq
   1380   %a = insertelement <8 x i16> undef, i16 %i, i32 0
   1381   %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   1382   ret <8 x i16> %shuffle
   1383 }
   1384 
   1385 define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
   1386 ; SSE-LABEL: shuffle_v8i16_z8zzzzzz:
   1387 ; SSE:       # BB#0:
   1388 ; SSE-NEXT:    pxor %xmm0, %xmm0
   1389 ; SSE-NEXT:    pinsrw $1, %edi, %xmm0
   1390 ; SSE-NEXT:    retq
   1391 ;
   1392 ; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
   1393 ; AVX:       # BB#0:
   1394 ; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
   1395 ; AVX-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0
   1396 ; AVX-NEXT:    retq
   1397   %a = insertelement <8 x i16> undef, i16 %i, i32 0
   1398   %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>
   1399   ret <8 x i16> %shuffle
   1400 }
   1401 
   1402 define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
   1403 ; SSE-LABEL: shuffle_v8i16_zzzzz8zz:
   1404 ; SSE:       # BB#0:
   1405 ; SSE-NEXT:    pxor %xmm0, %xmm0
   1406 ; SSE-NEXT:    pinsrw $5, %edi, %xmm0
   1407 ; SSE-NEXT:    retq
   1408 ;
   1409 ; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
   1410 ; AVX:       # BB#0:
   1411 ; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
   1412 ; AVX-NEXT:    vpinsrw $5, %edi, %xmm0, %xmm0
   1413 ; AVX-NEXT:    retq
   1414   %a = insertelement <8 x i16> undef, i16 %i, i32 0
   1415   %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>
   1416   ret <8 x i16> %shuffle
   1417 }
   1418 
   1419 define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
   1420 ; SSE-LABEL: shuffle_v8i16_zuuzuuz8:
   1421 ; SSE:       # BB#0:
   1422 ; SSE-NEXT:    pxor %xmm0, %xmm0
   1423 ; SSE-NEXT:    pinsrw $7, %edi, %xmm0
   1424 ; SSE-NEXT:    retq
   1425 ;
   1426 ; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
   1427 ; AVX:       # BB#0:
   1428 ; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
   1429 ; AVX-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0
   1430 ; AVX-NEXT:    retq
   1431   %a = insertelement <8 x i16> undef, i16 %i, i32 0
   1432   %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>
   1433   ret <8 x i16> %shuffle
   1434 }
   1435 
   1436 define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) {
   1437 ; SSE-LABEL: shuffle_v8i16_zzBzzzzz:
   1438 ; SSE:       # BB#0:
   1439 ; SSE-NEXT:    pxor %xmm0, %xmm0
   1440 ; SSE-NEXT:    pinsrw $2, %edi, %xmm0
   1441 ; SSE-NEXT:    retq
   1442 ;
   1443 ; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
   1444 ; AVX:       # BB#0:
   1445 ; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
   1446 ; AVX-NEXT:    vpinsrw $2, %edi, %xmm0, %xmm0
   1447 ; AVX-NEXT:    retq
   1448   %a = insertelement <8 x i16> undef, i16 %i, i32 3
   1449   %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>
   1450   ret <8 x i16> %shuffle
   1451 }
   1452 
   1453 define <8 x i16> @shuffle_v8i16_def01234(<8 x i16> %a, <8 x i16> %b) {
   1454 ; SSE2-LABEL: shuffle_v8i16_def01234:
   1455 ; SSE2:       # BB#0:
   1456 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1457 ; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
   1458 ; SSE2-NEXT:    por %xmm1, %xmm0
   1459 ; SSE2-NEXT:    retq
   1460 ;
   1461 ; SSSE3-LABEL: shuffle_v8i16_def01234:
   1462 ; SSSE3:       # BB#0:
   1463 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
   1464 ; SSSE3-NEXT:    retq
   1465 ;
   1466 ; SSE41-LABEL: shuffle_v8i16_def01234:
   1467 ; SSE41:       # BB#0:
   1468 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
   1469 ; SSE41-NEXT:    retq
   1470 ;
   1471 ; AVX-LABEL: shuffle_v8i16_def01234:
   1472 ; AVX:       # BB#0:
   1473 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
   1474 ; AVX-NEXT:    retq
   1475   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
   1476   ret <8 x i16> %shuffle
   1477 }
   1478 
   1479 define <8 x i16> @shuffle_v8i16_ueuu123u(<8 x i16> %a, <8 x i16> %b) {
   1480 ; SSE2-LABEL: shuffle_v8i16_ueuu123u:
   1481 ; SSE2:       # BB#0:
   1482 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1483 ; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
   1484 ; SSE2-NEXT:    por %xmm1, %xmm0
   1485 ; SSE2-NEXT:    retq
   1486 ;
   1487 ; SSSE3-LABEL: shuffle_v8i16_ueuu123u:
   1488 ; SSSE3:       # BB#0:
   1489 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
   1490 ; SSSE3-NEXT:    retq
   1491 ;
   1492 ; SSE41-LABEL: shuffle_v8i16_ueuu123u:
   1493 ; SSE41:       # BB#0:
   1494 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
   1495 ; SSE41-NEXT:    retq
   1496 ;
   1497 ; AVX-LABEL: shuffle_v8i16_ueuu123u:
   1498 ; AVX:       # BB#0:
   1499 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
   1500 ; AVX-NEXT:    retq
   1501   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 14, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
   1502   ret <8 x i16> %shuffle
   1503 }
   1504 
   1505 define <8 x i16> @shuffle_v8i16_56701234(<8 x i16> %a, <8 x i16> %b) {
   1506 ; SSE2-LABEL: shuffle_v8i16_56701234:
   1507 ; SSE2:       # BB#0:
   1508 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   1509 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1510 ; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
   1511 ; SSE2-NEXT:    por %xmm1, %xmm0
   1512 ; SSE2-NEXT:    retq
   1513 ;
   1514 ; SSSE3-LABEL: shuffle_v8i16_56701234:
   1515 ; SSSE3:       # BB#0:
   1516 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   1517 ; SSSE3-NEXT:    retq
   1518 ;
   1519 ; SSE41-LABEL: shuffle_v8i16_56701234:
   1520 ; SSE41:       # BB#0:
   1521 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   1522 ; SSE41-NEXT:    retq
   1523 ;
   1524 ; AVX-LABEL: shuffle_v8i16_56701234:
   1525 ; AVX:       # BB#0:
   1526 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   1527 ; AVX-NEXT:    retq
   1528   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4>
   1529   ret <8 x i16> %shuffle
   1530 }
   1531 
   1532 define <8 x i16> @shuffle_v8i16_u6uu123u(<8 x i16> %a, <8 x i16> %b) {
   1533 ; SSE2-LABEL: shuffle_v8i16_u6uu123u:
   1534 ; SSE2:       # BB#0:
   1535 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   1536 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1537 ; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
   1538 ; SSE2-NEXT:    por %xmm1, %xmm0
   1539 ; SSE2-NEXT:    retq
   1540 ;
   1541 ; SSSE3-LABEL: shuffle_v8i16_u6uu123u:
   1542 ; SSSE3:       # BB#0:
   1543 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   1544 ; SSSE3-NEXT:    retq
   1545 ;
   1546 ; SSE41-LABEL: shuffle_v8i16_u6uu123u:
   1547 ; SSE41:       # BB#0:
   1548 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   1549 ; SSE41-NEXT:    retq
   1550 ;
   1551 ; AVX-LABEL: shuffle_v8i16_u6uu123u:
   1552 ; AVX:       # BB#0:
   1553 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   1554 ; AVX-NEXT:    retq
   1555   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
   1556   ret <8 x i16> %shuffle
   1557 }
   1558 
   1559 define <8 x i16> @shuffle_v8i16_uuuu123u(<8 x i16> %a, <8 x i16> %b) {
   1560 ; SSE-LABEL: shuffle_v8i16_uuuu123u:
   1561 ; SSE:       # BB#0:
   1562 ; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
   1563 ; SSE-NEXT:    retq
   1564 ;
   1565 ; AVX-LABEL: shuffle_v8i16_uuuu123u:
   1566 ; AVX:       # BB#0:
   1567 ; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
   1568 ; AVX-NEXT:    retq
   1569   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
   1570   ret <8 x i16> %shuffle
   1571 }
   1572 
   1573 define <8 x i16> @shuffle_v8i16_bcdef012(<8 x i16> %a, <8 x i16> %b) {
   1574 ; SSE2-LABEL: shuffle_v8i16_bcdef012:
   1575 ; SSE2:       # BB#0:
   1576 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1577 ; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
   1578 ; SSE2-NEXT:    por %xmm1, %xmm0
   1579 ; SSE2-NEXT:    retq
   1580 ;
   1581 ; SSSE3-LABEL: shuffle_v8i16_bcdef012:
   1582 ; SSSE3:       # BB#0:
   1583 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
   1584 ; SSSE3-NEXT:    retq
   1585 ;
   1586 ; SSE41-LABEL: shuffle_v8i16_bcdef012:
   1587 ; SSE41:       # BB#0:
   1588 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
   1589 ; SSE41-NEXT:    retq
   1590 ;
   1591 ; AVX-LABEL: shuffle_v8i16_bcdef012:
   1592 ; AVX:       # BB#0:
   1593 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
   1594 ; AVX-NEXT:    retq
   1595   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2>
   1596   ret <8 x i16> %shuffle
   1597 }
   1598 
   1599 define <8 x i16> @shuffle_v8i16_ucdeuu1u(<8 x i16> %a, <8 x i16> %b) {
   1600 ; SSE2-LABEL: shuffle_v8i16_ucdeuu1u:
   1601 ; SSE2:       # BB#0:
   1602 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1603 ; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
   1604 ; SSE2-NEXT:    por %xmm1, %xmm0
   1605 ; SSE2-NEXT:    retq
   1606 ;
   1607 ; SSSE3-LABEL: shuffle_v8i16_ucdeuu1u:
   1608 ; SSSE3:       # BB#0:
   1609 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
   1610 ; SSSE3-NEXT:    retq
   1611 ;
   1612 ; SSE41-LABEL: shuffle_v8i16_ucdeuu1u:
   1613 ; SSE41:       # BB#0:
   1614 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
   1615 ; SSE41-NEXT:    retq
   1616 ;
   1617 ; AVX-LABEL: shuffle_v8i16_ucdeuu1u:
   1618 ; AVX:       # BB#0:
   1619 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
   1620 ; AVX-NEXT:    retq
   1621   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 1, i32 undef>
   1622   ret <8 x i16> %shuffle
   1623 }
   1624 
   1625 define <8 x i16> @shuffle_v8i16_34567012(<8 x i16> %a, <8 x i16> %b) {
   1626 ; SSE2-LABEL: shuffle_v8i16_34567012:
   1627 ; SSE2:       # BB#0:
   1628 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   1629 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1630 ; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
   1631 ; SSE2-NEXT:    por %xmm1, %xmm0
   1632 ; SSE2-NEXT:    retq
   1633 ;
   1634 ; SSSE3-LABEL: shuffle_v8i16_34567012:
   1635 ; SSSE3:       # BB#0:
   1636 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   1637 ; SSSE3-NEXT:    retq
   1638 ;
   1639 ; SSE41-LABEL: shuffle_v8i16_34567012:
   1640 ; SSE41:       # BB#0:
   1641 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   1642 ; SSE41-NEXT:    retq
   1643 ;
   1644 ; AVX-LABEL: shuffle_v8i16_34567012:
   1645 ; AVX:       # BB#0:
   1646 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   1647 ; AVX-NEXT:    retq
   1648   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2>
   1649   ret <8 x i16> %shuffle
   1650 }
   1651 
   1652 define <8 x i16> @shuffle_v8i16_u456uu1u(<8 x i16> %a, <8 x i16> %b) {
   1653 ; SSE2-LABEL: shuffle_v8i16_u456uu1u:
   1654 ; SSE2:       # BB#0:
   1655 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   1656 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1657 ; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
   1658 ; SSE2-NEXT:    por %xmm1, %xmm0
   1659 ; SSE2-NEXT:    retq
   1660 ;
   1661 ; SSSE3-LABEL: shuffle_v8i16_u456uu1u:
   1662 ; SSSE3:       # BB#0:
   1663 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   1664 ; SSSE3-NEXT:    retq
   1665 ;
   1666 ; SSE41-LABEL: shuffle_v8i16_u456uu1u:
   1667 ; SSE41:       # BB#0:
   1668 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   1669 ; SSE41-NEXT:    retq
   1670 ;
   1671 ; AVX-LABEL: shuffle_v8i16_u456uu1u:
   1672 ; AVX:       # BB#0:
   1673 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   1674 ; AVX-NEXT:    retq
   1675   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef>
   1676   ret <8 x i16> %shuffle
   1677 }
   1678 
   1679 define <8 x i16> @shuffle_v8i16_u456uuuu(<8 x i16> %a, <8 x i16> %b) {
   1680 ; SSE-LABEL: shuffle_v8i16_u456uuuu:
   1681 ; SSE:       # BB#0:
   1682 ; SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1683 ; SSE-NEXT:    retq
   1684 ;
   1685 ; AVX-LABEL: shuffle_v8i16_u456uuuu:
   1686 ; AVX:       # BB#0:
   1687 ; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1688 ; AVX-NEXT:    retq
   1689   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
   1690   ret <8 x i16> %shuffle
   1691 }
   1692 
   1693 define <8 x i16> @shuffle_v8i16_3456789a(<8 x i16> %a, <8 x i16> %b) {
   1694 ; SSE2-LABEL: shuffle_v8i16_3456789a:
   1695 ; SSE2:       # BB#0:
   1696 ; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1697 ; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
   1698 ; SSE2-NEXT:    por %xmm1, %xmm0
   1699 ; SSE2-NEXT:    retq
   1700 ;
   1701 ; SSSE3-LABEL: shuffle_v8i16_3456789a:
   1702 ; SSSE3:       # BB#0:
   1703 ; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
   1704 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
   1705 ; SSSE3-NEXT:    retq
   1706 ;
   1707 ; SSE41-LABEL: shuffle_v8i16_3456789a:
   1708 ; SSE41:       # BB#0:
   1709 ; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
   1710 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
   1711 ; SSE41-NEXT:    retq
   1712 ;
   1713 ; AVX-LABEL: shuffle_v8i16_3456789a:
   1714 ; AVX:       # BB#0:
   1715 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
   1716 ; AVX-NEXT:    retq
   1717   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
   1718   ret <8 x i16> %shuffle
   1719 }
   1720 
   1721 define <8 x i16> @shuffle_v8i16_u456uu9u(<8 x i16> %a, <8 x i16> %b) {
   1722 ; SSE2-LABEL: shuffle_v8i16_u456uu9u:
   1723 ; SSE2:       # BB#0:
   1724 ; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1725 ; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
   1726 ; SSE2-NEXT:    por %xmm1, %xmm0
   1727 ; SSE2-NEXT:    retq
   1728 ;
   1729 ; SSSE3-LABEL: shuffle_v8i16_u456uu9u:
   1730 ; SSSE3:       # BB#0:
   1731 ; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
   1732 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
   1733 ; SSSE3-NEXT:    retq
   1734 ;
   1735 ; SSE41-LABEL: shuffle_v8i16_u456uu9u:
   1736 ; SSE41:       # BB#0:
   1737 ; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
   1738 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
   1739 ; SSE41-NEXT:    retq
   1740 ;
   1741 ; AVX-LABEL: shuffle_v8i16_u456uu9u:
   1742 ; AVX:       # BB#0:
   1743 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
   1744 ; AVX-NEXT:    retq
   1745   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 9, i32 undef>
   1746   ret <8 x i16> %shuffle
   1747 }
   1748 
   1749 define <8 x i16> @shuffle_v8i16_56789abc(<8 x i16> %a, <8 x i16> %b) {
   1750 ; SSE2-LABEL: shuffle_v8i16_56789abc:
   1751 ; SSE2:       # BB#0:
   1752 ; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1753 ; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
   1754 ; SSE2-NEXT:    por %xmm1, %xmm0
   1755 ; SSE2-NEXT:    retq
   1756 ;
   1757 ; SSSE3-LABEL: shuffle_v8i16_56789abc:
   1758 ; SSSE3:       # BB#0:
   1759 ; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
   1760 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
   1761 ; SSSE3-NEXT:    retq
   1762 ;
   1763 ; SSE41-LABEL: shuffle_v8i16_56789abc:
   1764 ; SSE41:       # BB#0:
   1765 ; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
   1766 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
   1767 ; SSE41-NEXT:    retq
   1768 ;
   1769 ; AVX-LABEL: shuffle_v8i16_56789abc:
   1770 ; AVX:       # BB#0:
   1771 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
   1772 ; AVX-NEXT:    retq
   1773   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
   1774   ret <8 x i16> %shuffle
   1775 }
   1776 
   1777 define <8 x i16> @shuffle_v8i16_u6uu9abu(<8 x i16> %a, <8 x i16> %b) {
   1778 ; SSE2-LABEL: shuffle_v8i16_u6uu9abu:
   1779 ; SSE2:       # BB#0:
   1780 ; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1781 ; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
   1782 ; SSE2-NEXT:    por %xmm1, %xmm0
   1783 ; SSE2-NEXT:    retq
   1784 ;
   1785 ; SSSE3-LABEL: shuffle_v8i16_u6uu9abu:
   1786 ; SSSE3:       # BB#0:
   1787 ; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
   1788 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
   1789 ; SSSE3-NEXT:    retq
   1790 ;
   1791 ; SSE41-LABEL: shuffle_v8i16_u6uu9abu:
   1792 ; SSE41:       # BB#0:
   1793 ; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
   1794 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
   1795 ; SSE41-NEXT:    retq
   1796 ;
   1797 ; AVX-LABEL: shuffle_v8i16_u6uu9abu:
   1798 ; AVX:       # BB#0:
   1799 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
   1800 ; AVX-NEXT:    retq
   1801   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
   1802   ret <8 x i16> %shuffle
   1803 }
   1804 
   1805 define <8 x i16> @shuffle_v8i16_0uuu1uuu(<8 x i16> %a) {
   1806 ; SSE2-LABEL: shuffle_v8i16_0uuu1uuu:
   1807 ; SSE2:       # BB#0:
   1808 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
   1809 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
   1810 ; SSE2-NEXT:    retq
   1811 ;
   1812 ; SSSE3-LABEL: shuffle_v8i16_0uuu1uuu:
   1813 ; SSSE3:       # BB#0:
   1814 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
   1815 ; SSSE3-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
   1816 ; SSSE3-NEXT:    retq
   1817 ;
   1818 ; SSE41-LABEL: shuffle_v8i16_0uuu1uuu:
   1819 ; SSE41:       # BB#0:
   1820 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   1821 ; SSE41-NEXT:    retq
   1822 ;
   1823 ; AVX-LABEL: shuffle_v8i16_0uuu1uuu:
   1824 ; AVX:       # BB#0:
   1825 ; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   1826 ; AVX-NEXT:    retq
   1827   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef>
   1828   ret <8 x i16> %shuffle
   1829 }
   1830 
   1831 define <8 x i16> @shuffle_v8i16_0zzz1zzz(<8 x i16> %a) {
   1832 ; SSE2-LABEL: shuffle_v8i16_0zzz1zzz:
   1833 ; SSE2:       # BB#0:
   1834 ; SSE2-NEXT:    pxor %xmm1, %xmm1
   1835 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1836 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   1837 ; SSE2-NEXT:    retq
   1838 ;
   1839 ; SSSE3-LABEL: shuffle_v8i16_0zzz1zzz:
   1840 ; SSSE3:       # BB#0:
   1841 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
   1842 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1843 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   1844 ; SSSE3-NEXT:    retq
   1845 ;
   1846 ; SSE41-LABEL: shuffle_v8i16_0zzz1zzz:
   1847 ; SSE41:       # BB#0:
   1848 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   1849 ; SSE41-NEXT:    retq
   1850 ;
   1851 ; AVX-LABEL: shuffle_v8i16_0zzz1zzz:
   1852 ; AVX:       # BB#0:
   1853 ; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   1854 ; AVX-NEXT:    retq
   1855   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
   1856   ret <8 x i16> %shuffle
   1857 }
   1858 
   1859 define <8 x i16> @shuffle_v8i16_0u1u2u3u(<8 x i16> %a) {
   1860 ; SSE2-LABEL: shuffle_v8i16_0u1u2u3u:
   1861 ; SSE2:       # BB#0:
   1862 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
   1863 ; SSE2-NEXT:    retq
   1864 ;
   1865 ; SSSE3-LABEL: shuffle_v8i16_0u1u2u3u:
   1866 ; SSSE3:       # BB#0:
   1867 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
   1868 ; SSSE3-NEXT:    retq
   1869 ;
   1870 ; SSE41-LABEL: shuffle_v8i16_0u1u2u3u:
   1871 ; SSE41:       # BB#0:
   1872 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1873 ; SSE41-NEXT:    retq
   1874 ;
   1875 ; AVX-LABEL: shuffle_v8i16_0u1u2u3u:
   1876 ; AVX:       # BB#0:
   1877 ; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1878 ; AVX-NEXT:    retq
   1879   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef>
   1880   ret <8 x i16> %shuffle
   1881 }
   1882 
   1883 define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) {
   1884 ; SSE2-LABEL: shuffle_v8i16_0z1z2z3z:
   1885 ; SSE2:       # BB#0:
   1886 ; SSE2-NEXT:    pxor %xmm1, %xmm1
   1887 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1888 ; SSE2-NEXT:    retq
   1889 ;
   1890 ; SSSE3-LABEL: shuffle_v8i16_0z1z2z3z:
   1891 ; SSSE3:       # BB#0:
   1892 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
   1893 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1894 ; SSSE3-NEXT:    retq
   1895 ;
   1896 ; SSE41-LABEL: shuffle_v8i16_0z1z2z3z:
   1897 ; SSE41:       # BB#0:
   1898 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1899 ; SSE41-NEXT:    retq
   1900 ;
   1901 ; AVX-LABEL: shuffle_v8i16_0z1z2z3z:
   1902 ; AVX:       # BB#0:
   1903 ; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1904 ; AVX-NEXT:    retq
   1905   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
   1906   ret <8 x i16> %shuffle
   1907 }
   1908 
   1909 ;
   1910 ; Shuffle to logical bit shifts
   1911 ;
   1912 define <8 x i16> @shuffle_v8i16_z0z2z4z6(<8 x i16> %a) {
   1913 ; SSE-LABEL: shuffle_v8i16_z0z2z4z6:
   1914 ; SSE:       # BB#0:
   1915 ; SSE-NEXT:    pslld $16, %xmm0
   1916 ; SSE-NEXT:    retq
   1917 ;
   1918 ; AVX-LABEL: shuffle_v8i16_z0z2z4z6:
   1919 ; AVX:       # BB#0:
   1920 ; AVX-NEXT:    vpslld $16, %xmm0, %xmm0
   1921 ; AVX-NEXT:    retq
   1922   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6>
   1923   ret <8 x i16> %shuffle
   1924 }
   1925 
   1926 define <8 x i16> @shuffle_v8i16_zzz0zzz4(<8 x i16> %a) {
   1927 ; SSE-LABEL: shuffle_v8i16_zzz0zzz4:
   1928 ; SSE:       # BB#0:
   1929 ; SSE-NEXT:    psllq $48, %xmm0
   1930 ; SSE-NEXT:    retq
   1931 ;
   1932 ; AVX-LABEL: shuffle_v8i16_zzz0zzz4:
   1933 ; AVX:       # BB#0:
   1934 ; AVX-NEXT:    vpsllq $48, %xmm0, %xmm0
   1935 ; AVX-NEXT:    retq
   1936   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 8, i32 0, i32 8, i32 8, i32 8, i32 4>
   1937   ret <8 x i16> %shuffle
   1938 }
   1939 
   1940 define <8 x i16> @shuffle_v8i16_zz01zX4X(<8 x i16> %a) {
   1941 ; SSE-LABEL: shuffle_v8i16_zz01zX4X:
   1942 ; SSE:       # BB#0:
   1943 ; SSE-NEXT:    psllq $32, %xmm0
   1944 ; SSE-NEXT:    retq
   1945 ;
   1946 ; AVX-LABEL: shuffle_v8i16_zz01zX4X:
   1947 ; AVX:       # BB#0:
   1948 ; AVX-NEXT:    vpsllq $32, %xmm0, %xmm0
   1949 ; AVX-NEXT:    retq
   1950   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 0, i32 1, i32 8, i32 undef, i32 4, i32 undef>
   1951   ret <8 x i16> %shuffle
   1952 }
   1953 
   1954 define <8 x i16> @shuffle_v8i16_z0X2z456(<8 x i16> %a) {
   1955 ; SSE-LABEL: shuffle_v8i16_z0X2z456:
   1956 ; SSE:       # BB#0:
   1957 ; SSE-NEXT:    psllq $16, %xmm0
   1958 ; SSE-NEXT:    retq
   1959 ;
   1960 ; AVX-LABEL: shuffle_v8i16_z0X2z456:
   1961 ; AVX:       # BB#0:
   1962 ; AVX-NEXT:    vpsllq $16, %xmm0, %xmm0
   1963 ; AVX-NEXT:    retq
   1964   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 undef, i32 2, i32 8, i32 4, i32 5, i32 6>
   1965   ret <8 x i16> %shuffle
   1966 }
   1967 
   1968 define <8 x i16> @shuffle_v8i16_1z3zXz7z(<8 x i16> %a) {
   1969 ; SSE-LABEL: shuffle_v8i16_1z3zXz7z:
   1970 ; SSE:       # BB#0:
   1971 ; SSE-NEXT:    psrld $16, %xmm0
   1972 ; SSE-NEXT:    retq
   1973 ;
   1974 ; AVX-LABEL: shuffle_v8i16_1z3zXz7z:
   1975 ; AVX:       # BB#0:
   1976 ; AVX-NEXT:    vpsrld $16, %xmm0, %xmm0
   1977 ; AVX-NEXT:    retq
   1978   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 8, i32 3, i32 8, i32 undef, i32 8, i32 7, i32 8>
   1979   ret <8 x i16> %shuffle
   1980 }
   1981 
   1982 define <8 x i16> @shuffle_v8i16_1X3z567z(<8 x i16> %a) {
   1983 ; SSE-LABEL: shuffle_v8i16_1X3z567z:
   1984 ; SSE:       # BB#0:
   1985 ; SSE-NEXT:    psrlq $16, %xmm0
   1986 ; SSE-NEXT:    retq
   1987 ;
   1988 ; AVX-LABEL: shuffle_v8i16_1X3z567z:
   1989 ; AVX:       # BB#0:
   1990 ; AVX-NEXT:    vpsrlq $16, %xmm0, %xmm0
   1991 ; AVX-NEXT:    retq
   1992   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 undef, i32 3, i32 8, i32 5, i32 6, i32 7, i32 8>
   1993   ret <8 x i16> %shuffle
   1994 }
   1995 
   1996 define <8 x i16> @shuffle_v8i16_23zz67zz(<8 x i16> %a) {
   1997 ; SSE-LABEL: shuffle_v8i16_23zz67zz:
   1998 ; SSE:       # BB#0:
   1999 ; SSE-NEXT:    psrlq $32, %xmm0
   2000 ; SSE-NEXT:    retq
   2001 ;
   2002 ; AVX-LABEL: shuffle_v8i16_23zz67zz:
   2003 ; AVX:       # BB#0:
   2004 ; AVX-NEXT:    vpsrlq $32, %xmm0, %xmm0
   2005 ; AVX-NEXT:    retq
   2006   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 2, i32 3, i32 8, i32 8, i32 6, i32 7, i32 8, i32 8>
   2007   ret <8 x i16> %shuffle
   2008 }
   2009 
   2010 define <8 x i16> @shuffle_v8i16_3zXXXzzz(<8 x i16> %a) {
   2011 ; SSE-LABEL: shuffle_v8i16_3zXXXzzz:
   2012 ; SSE:       # BB#0:
   2013 ; SSE-NEXT:    psrlq $48, %xmm0
   2014 ; SSE-NEXT:    retq
   2015 ;
   2016 ; AVX-LABEL: shuffle_v8i16_3zXXXzzz:
   2017 ; AVX:       # BB#0:
   2018 ; AVX-NEXT:    vpsrlq $48, %xmm0, %xmm0
   2019 ; AVX-NEXT:    retq
   2020   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 3, i32 8, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8>
   2021   ret <8 x i16> %shuffle
   2022 }
   2023 
   2024 define <8 x i16> @shuffle_v8i16_01u3zzuz(<8 x i16> %a) {
   2025 ; SSE-LABEL: shuffle_v8i16_01u3zzuz:
   2026 ; SSE:       # BB#0:
   2027 ; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
   2028 ; SSE-NEXT:    retq
   2029 ;
   2030 ; AVX-LABEL: shuffle_v8i16_01u3zzuz:
   2031 ; AVX:       # BB#0:
   2032 ; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
   2033 ; AVX-NEXT:    retq
   2034   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 3, i32 8, i32 8, i32 undef, i32 8>
   2035   ret <8 x i16> %shuffle
   2036 }
   2037 
   2038 define <8 x i16> @shuffle_v8i16_0z234567(<8 x i16> %a) {
   2039 ; SSE2-LABEL: shuffle_v8i16_0z234567:
   2040 ; SSE2:       # BB#0:
   2041 ; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
   2042 ; SSE2-NEXT:    retq
   2043 ;
   2044 ; SSSE3-LABEL: shuffle_v8i16_0z234567:
   2045 ; SSSE3:       # BB#0:
   2046 ; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
   2047 ; SSSE3-NEXT:    retq
   2048 ;
   2049 ; SSE41-LABEL: shuffle_v8i16_0z234567:
   2050 ; SSE41:       # BB#0:
   2051 ; SSE41-NEXT:    pxor %xmm1, %xmm1
   2052 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
   2053 ; SSE41-NEXT:    retq
   2054 ;
   2055 ; AVX-LABEL: shuffle_v8i16_0z234567:
   2056 ; AVX:       # BB#0:
   2057 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   2058 ; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
   2059 ; AVX-NEXT:    retq
   2060   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   2061   ret <8 x i16> %shuffle
   2062 }
   2063 
   2064 define <8 x i16> @shuffle_v8i16_0zzzz5z7(<8 x i16> %a) {
   2065 ; SSE2-LABEL: shuffle_v8i16_0zzzz5z7:
   2066 ; SSE2:       # BB#0:
   2067 ; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
   2068 ; SSE2-NEXT:    retq
   2069 ;
   2070 ; SSSE3-LABEL: shuffle_v8i16_0zzzz5z7:
   2071 ; SSSE3:       # BB#0:
   2072 ; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
   2073 ; SSSE3-NEXT:    retq
   2074 ;
   2075 ; SSE41-LABEL: shuffle_v8i16_0zzzz5z7:
   2076 ; SSE41:       # BB#0:
   2077 ; SSE41-NEXT:    pxor %xmm1, %xmm1
   2078 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
   2079 ; SSE41-NEXT:    retq
   2080 ;
   2081 ; AVX-LABEL: shuffle_v8i16_0zzzz5z7:
   2082 ; AVX:       # BB#0:
   2083 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   2084 ; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
   2085 ; AVX-NEXT:    retq
   2086   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 5, i32 8, i32 7>
   2087   ret <8 x i16> %shuffle
   2088 }
   2089 
   2090 define <8 x i16> @shuffle_v8i16_0123456z(<8 x i16> %a) {
   2091 ; SSE2-LABEL: shuffle_v8i16_0123456z:
   2092 ; SSE2:       # BB#0:
   2093 ; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
   2094 ; SSE2-NEXT:    retq
   2095 ;
   2096 ; SSSE3-LABEL: shuffle_v8i16_0123456z:
   2097 ; SSSE3:       # BB#0:
   2098 ; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
   2099 ; SSSE3-NEXT:    retq
   2100 ;
   2101 ; SSE41-LABEL: shuffle_v8i16_0123456z:
   2102 ; SSE41:       # BB#0:
   2103 ; SSE41-NEXT:    pxor %xmm1, %xmm1
   2104 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
   2105 ; SSE41-NEXT:    retq
   2106 ;
   2107 ; AVX-LABEL: shuffle_v8i16_0123456z:
   2108 ; AVX:       # BB#0:
   2109 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   2110 ; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
   2111 ; AVX-NEXT:    retq
   2112   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
   2113   ret <8 x i16> %shuffle
   2114 }
   2115 
   2116 define <8 x i16> @shuffle_v8i16_fu3ucc5u(<8 x i16> %a, <8 x i16> %b) {
   2117 ; SSE-LABEL: shuffle_v8i16_fu3ucc5u:
   2118 ; SSE:       # BB#0:
   2119 ; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   2120 ; SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
   2121 ; SSE-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
   2122 ; SSE-NEXT:    movdqa %xmm1, %xmm0
   2123 ; SSE-NEXT:    retq
   2124 ;
   2125 ; AVX-LABEL: shuffle_v8i16_fu3ucc5u:
   2126 ; AVX:       # BB#0:
   2127 ; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   2128 ; AVX-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
   2129 ; AVX-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
   2130 ; AVX-NEXT:    retq
   2131   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 15, i32 undef, i32 3, i32 undef, i32 12, i32 12, i32 5, i32 undef>
   2132   ret <8 x i16> %shuffle
   2133 }
   2134 
   2135 define <8 x i16> @shuffle_v8i16_8012345u(<8 x i16> %a) {
   2136 ; SSE-LABEL: shuffle_v8i16_8012345u:
   2137 ; SSE:       # BB#0:
   2138 ; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   2139 ; SSE-NEXT:    retq
   2140 ;
   2141 ; AVX-LABEL: shuffle_v8i16_8012345u:
   2142 ; AVX:       # BB#0:
   2143 ; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   2144 ; AVX-NEXT:    retq
   2145   %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 undef>
   2146 
   2147   ret <8 x i16> %shuffle
   2148 }
   2149 
   2150 define <8 x i16> @insert_dup_mem_v8i16_i32(i32* %ptr) {
   2151 ; SSE2-LABEL: insert_dup_mem_v8i16_i32:
   2152 ; SSE2:       # BB#0:
   2153 ; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2154 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
   2155 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   2156 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
   2157 ; SSE2-NEXT:    retq
   2158 ;
   2159 ; SSSE3-LABEL: insert_dup_mem_v8i16_i32:
   2160 ; SSSE3:       # BB#0:
   2161 ; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2162 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
   2163 ; SSSE3-NEXT:    retq
   2164 ;
   2165 ; SSE41-LABEL: insert_dup_mem_v8i16_i32:
   2166 ; SSE41:       # BB#0:
   2167 ; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2168 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
   2169 ; SSE41-NEXT:    retq
   2170 ;
   2171 ; AVX1-LABEL: insert_dup_mem_v8i16_i32:
   2172 ; AVX1:       # BB#0:
   2173 ; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2174 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
   2175 ; AVX1-NEXT:    retq
   2176 ;
   2177 ; AVX2-LABEL: insert_dup_mem_v8i16_i32:
   2178 ; AVX2:       # BB#0:
   2179 ; AVX2-NEXT:    vpbroadcastw (%rdi), %xmm0
   2180 ; AVX2-NEXT:    retq
   2181   %tmp = load i32, i32* %ptr, align 4
   2182   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
   2183   %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
   2184   %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> zeroinitializer
   2185   ret <8 x i16> %tmp3
   2186 }
   2187 
   2188 define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) {
   2189 ; SSE2-LABEL: insert_dup_mem_v8i16_sext_i16:
   2190 ; SSE2:       # BB#0:
   2191 ; SSE2-NEXT:    movswl (%rdi), %eax
   2192 ; SSE2-NEXT:    movd %eax, %xmm0
   2193 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
   2194 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   2195 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
   2196 ; SSE2-NEXT:    retq
   2197 ;
   2198 ; SSSE3-LABEL: insert_dup_mem_v8i16_sext_i16:
   2199 ; SSSE3:       # BB#0:
   2200 ; SSSE3-NEXT:    movswl (%rdi), %eax
   2201 ; SSSE3-NEXT:    movd %eax, %xmm0
   2202 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
   2203 ; SSSE3-NEXT:    retq
   2204 ;
   2205 ; SSE41-LABEL: insert_dup_mem_v8i16_sext_i16:
   2206 ; SSE41:       # BB#0:
   2207 ; SSE41-NEXT:    movswl (%rdi), %eax
   2208 ; SSE41-NEXT:    movd %eax, %xmm0
   2209 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
   2210 ; SSE41-NEXT:    retq
   2211 ;
   2212 ; AVX1-LABEL: insert_dup_mem_v8i16_sext_i16:
   2213 ; AVX1:       # BB#0:
   2214 ; AVX1-NEXT:    movswl (%rdi), %eax
   2215 ; AVX1-NEXT:    vmovd %eax, %xmm0
   2216 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
   2217 ; AVX1-NEXT:    retq
   2218 ;
   2219 ; AVX2-LABEL: insert_dup_mem_v8i16_sext_i16:
   2220 ; AVX2:       # BB#0:
   2221 ; AVX2-NEXT:    movswl (%rdi), %eax
   2222 ; AVX2-NEXT:    vmovd %eax, %xmm0
   2223 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
   2224 ; AVX2-NEXT:    retq
   2225   %tmp = load i16, i16* %ptr, align 2
   2226   %tmp1 = sext i16 %tmp to i32
   2227   %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
   2228   %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
   2229   %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> zeroinitializer
   2230   ret <8 x i16> %tmp4
   2231 }
   2232 
   2233 define <8 x i16> @insert_dup_elt1_mem_v8i16_i32(i32* %ptr) {
   2234 ; SSE2-LABEL: insert_dup_elt1_mem_v8i16_i32:
   2235 ; SSE2:       # BB#0:
   2236 ; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2237 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
   2238 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
   2239 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
   2240 ; SSE2-NEXT:    retq
   2241 ;
   2242 ; SSSE3-LABEL: insert_dup_elt1_mem_v8i16_i32:
   2243 ; SSSE3:       # BB#0:
   2244 ; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2245 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
   2246 ; SSSE3-NEXT:    retq
   2247 ;
   2248 ; SSE41-LABEL: insert_dup_elt1_mem_v8i16_i32:
   2249 ; SSE41:       # BB#0:
   2250 ; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2251 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
   2252 ; SSE41-NEXT:    retq
   2253 ;
   2254 ; AVX1-LABEL: insert_dup_elt1_mem_v8i16_i32:
   2255 ; AVX1:       # BB#0:
   2256 ; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2257 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
   2258 ; AVX1-NEXT:    retq
   2259 ;
   2260 ; AVX2-LABEL: insert_dup_elt1_mem_v8i16_i32:
   2261 ; AVX2:       # BB#0:
   2262 ; AVX2-NEXT:    vpbroadcastw 2(%rdi), %xmm0
   2263 ; AVX2-NEXT:    retq
   2264   %tmp = load i32, i32* %ptr, align 4
   2265   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
   2266   %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
   2267   %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   2268   ret <8 x i16> %tmp3
   2269 }
   2270 
   2271 define <8 x i16> @insert_dup_elt3_mem_v8i16_i32(i32* %ptr) {
   2272 ; SSE2-LABEL: insert_dup_elt3_mem_v8i16_i32:
   2273 ; SSE2:       # BB#0:
   2274 ; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2275 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,1,0]
   2276 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
   2277 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
   2278 ; SSE2-NEXT:    retq
   2279 ;
   2280 ; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_i32:
   2281 ; SSSE3:       # BB#0:
   2282 ; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2283 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
   2284 ; SSSE3-NEXT:    retq
   2285 ;
   2286 ; SSE41-LABEL: insert_dup_elt3_mem_v8i16_i32:
   2287 ; SSE41:       # BB#0:
   2288 ; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2289 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
   2290 ; SSE41-NEXT:    retq
   2291 ;
   2292 ; AVX1-LABEL: insert_dup_elt3_mem_v8i16_i32:
   2293 ; AVX1:       # BB#0:
   2294 ; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2295 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
   2296 ; AVX1-NEXT:    retq
   2297 ;
   2298 ; AVX2-LABEL: insert_dup_elt3_mem_v8i16_i32:
   2299 ; AVX2:       # BB#0:
   2300 ; AVX2-NEXT:    vpbroadcastw 2(%rdi), %xmm0
   2301 ; AVX2-NEXT:    retq
   2302   %tmp = load i32, i32* %ptr, align 4
   2303   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
   2304   %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
   2305   %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
   2306   ret <8 x i16> %tmp3
   2307 }
   2308 
   2309 define <8 x i16> @insert_dup_elt1_mem_v8i16_sext_i16(i16* %ptr) {
   2310 ; SSE2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
   2311 ; SSE2:       # BB#0:
   2312 ; SSE2-NEXT:    movswl (%rdi), %eax
   2313 ; SSE2-NEXT:    movd %eax, %xmm0
   2314 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
   2315 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
   2316 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
   2317 ; SSE2-NEXT:    retq
   2318 ;
   2319 ; SSSE3-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
   2320 ; SSSE3:       # BB#0:
   2321 ; SSSE3-NEXT:    movswl (%rdi), %eax
   2322 ; SSSE3-NEXT:    movd %eax, %xmm0
   2323 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
   2324 ; SSSE3-NEXT:    retq
   2325 ;
   2326 ; SSE41-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
   2327 ; SSE41:       # BB#0:
   2328 ; SSE41-NEXT:    movswl (%rdi), %eax
   2329 ; SSE41-NEXT:    movd %eax, %xmm0
   2330 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
   2331 ; SSE41-NEXT:    retq
   2332 ;
   2333 ; AVX1-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
   2334 ; AVX1:       # BB#0:
   2335 ; AVX1-NEXT:    movswl (%rdi), %eax
   2336 ; AVX1-NEXT:    vmovd %eax, %xmm0
   2337 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
   2338 ; AVX1-NEXT:    retq
   2339 ;
   2340 ; AVX2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
   2341 ; AVX2:       # BB#0:
   2342 ; AVX2-NEXT:    movswl (%rdi), %eax
   2343 ; AVX2-NEXT:    shrl $16, %eax
   2344 ; AVX2-NEXT:    vmovd %eax, %xmm0
   2345 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
   2346 ; AVX2-NEXT:    retq
   2347   %tmp = load i16, i16* %ptr, align 2
   2348   %tmp1 = sext i16 %tmp to i32
   2349   %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
   2350   %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
   2351   %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   2352   ret <8 x i16> %tmp4
   2353 }
   2354 
   2355 define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(i16* %ptr) {
   2356 ; SSE2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
   2357 ; SSE2:       # BB#0:
   2358 ; SSE2-NEXT:    movswl (%rdi), %eax
   2359 ; SSE2-NEXT:    movd %eax, %xmm0
   2360 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,1,0]
   2361 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
   2362 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
   2363 ; SSE2-NEXT:    retq
   2364 ;
   2365 ; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
   2366 ; SSSE3:       # BB#0:
   2367 ; SSSE3-NEXT:    movswl (%rdi), %eax
   2368 ; SSSE3-NEXT:    movd %eax, %xmm0
   2369 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
   2370 ; SSSE3-NEXT:    retq
   2371 ;
   2372 ; SSE41-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
   2373 ; SSE41:       # BB#0:
   2374 ; SSE41-NEXT:    movswl (%rdi), %eax
   2375 ; SSE41-NEXT:    movd %eax, %xmm0
   2376 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
   2377 ; SSE41-NEXT:    retq
   2378 ;
   2379 ; AVX1-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
   2380 ; AVX1:       # BB#0:
   2381 ; AVX1-NEXT:    movswl (%rdi), %eax
   2382 ; AVX1-NEXT:    vmovd %eax, %xmm0
   2383 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
   2384 ; AVX1-NEXT:    retq
   2385 ;
   2386 ; AVX2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
   2387 ; AVX2:       # BB#0:
   2388 ; AVX2-NEXT:    movswl (%rdi), %eax
   2389 ; AVX2-NEXT:    shrl $16, %eax
   2390 ; AVX2-NEXT:    vmovd %eax, %xmm0
   2391 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
   2392 ; AVX2-NEXT:    retq
   2393   %tmp = load i16, i16* %ptr, align 2
   2394   %tmp1 = sext i16 %tmp to i32
   2395   %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 1
   2396   %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
   2397   %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
   2398   ret <8 x i16> %tmp4
   2399 }
   2400