Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
      3 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
      4 
      5 target triple = "x86_64-unknown-unknown"
      6 
      7 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
      8 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
      9 ; AVX1:       # BB#0:
     10 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
     11 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
     12 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
     13 ; AVX1-NEXT:    retq
     14 ;
     15 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
     16 ; AVX2:       # BB#0:
     17 ; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
     18 ; AVX2-NEXT:    retq
     19   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
     20   ret <16 x i16> %shuffle
     21 }
     22 
     23 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00(<16 x i16> %a, <16 x i16> %b) {
     24 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
     25 ; AVX1:       # BB#0:
     26 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
     27 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
     28 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
     29 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     30 ; AVX1-NEXT:    retq
     31 ;
     32 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
     33 ; AVX2:       # BB#0:
     34 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
     35 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
     36 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
     37 ; AVX2-NEXT:    retq
     38   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
     39   ret <16 x i16> %shuffle
     40 }
     41 
     42 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00(<16 x i16> %a, <16 x i16> %b) {
     43 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
     44 ; AVX1:       # BB#0:
     45 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
     46 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
     47 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
     48 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     49 ; AVX1-NEXT:    retq
     50 ;
     51 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
     52 ; AVX2:       # BB#0:
     53 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
     54 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
     55 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
     56 ; AVX2-NEXT:    retq
     57   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
     58   ret <16 x i16> %shuffle
     59 }
     60 
     61 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00(<16 x i16> %a, <16 x i16> %b) {
     62 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
     63 ; AVX1:       # BB#0:
     64 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
     65 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
     66 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
     67 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     68 ; AVX1-NEXT:    retq
     69 ;
     70 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
     71 ; AVX2:       # BB#0:
     72 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
     73 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
     74 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
     75 ; AVX2-NEXT:    retq
     76   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
     77   ret <16 x i16> %shuffle
     78 }
     79 
     80 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
     81 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
     82 ; AVX1:       # BB#0:
     83 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
     84 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
     85 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
     86 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     87 ; AVX1-NEXT:    retq
     88 ;
     89 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
     90 ; AVX2:       # BB#0:
     91 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
     92 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
     93 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
     94 ; AVX2-NEXT:    retq
     95   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
     96   ret <16 x i16> %shuffle
     97 }
     98 
     99 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    100 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
    101 ; AVX1:       # BB#0:
    102 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
    103 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
    104 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
    105 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    106 ; AVX1-NEXT:    retq
    107 ;
    108 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
    109 ; AVX2:       # BB#0:
    110 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
    111 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
    112 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
    113 ; AVX2-NEXT:    retq
    114   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
    115   ret <16 x i16> %shuffle
    116 }
    117 
    118 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    119 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
    120 ; AVX1:       # BB#0:
    121 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
    122 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
    123 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
    124 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    125 ; AVX1-NEXT:    retq
    126 ;
    127 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
    128 ; AVX2:       # BB#0:
    129 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
    130 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
    131 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
    132 ; AVX2-NEXT:    retq
    133   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    134   ret <16 x i16> %shuffle
    135 }
    136 
    137 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    138 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
    139 ; AVX1:       # BB#0:
    140 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
    141 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
    142 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    143 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    144 ; AVX1-NEXT:    retq
    145 ;
    146 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
    147 ; AVX2:       # BB#0:
    148 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
    149 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    150 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
    151 ; AVX2-NEXT:    retq
    152   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    153   ret <16 x i16> %shuffle
    154 }
    155 
    156 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    157 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
    158 ; AVX1:       # BB#0:
    159 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    160 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    161 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3]
    162 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
    163 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
    164 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    165 ; AVX1-NEXT:    retq
    166 ;
    167 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
    168 ; AVX2:       # BB#0:
    169 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
    170 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
    171 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
    172 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
    173 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
    174 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
    175 ; AVX2-NEXT:    retq
    176   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    177   ret <16 x i16> %shuffle
    178 }
    179 
    180 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    181 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
    182 ; AVX1:       # BB#0:
    183 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    184 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    185 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,6,7,0,1]
    186 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
    187 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
    188 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    189 ; AVX1-NEXT:    retq
    190 ;
    191 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
    192 ; AVX2:       # BB#0:
    193 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    194 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,0,255,255,u,u,u,u,u,u,u,u,u,u,u,u,255,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
    195 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
    196 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    197 ; AVX2-NEXT:    retq
    198   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    199   ret <16 x i16> %shuffle
    200 }
    201 
    202 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    203 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
    204 ; AVX1:       # BB#0:
    205 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    206 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    207 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1]
    208 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
    209 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
    210 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    211 ; AVX1-NEXT:    retq
    212 ;
    213 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
    214 ; AVX2:       # BB#0:
    215 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    216 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
    217 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    218 ; AVX2-NEXT:    retq
    219   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    220   ret <16 x i16> %shuffle
    221 }
    222 
    223 define <16 x i16> @shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    224 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
    225 ; AVX1:       # BB#0:
    226 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    227 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    228 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1]
    229 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
    230 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
    231 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    232 ; AVX1-NEXT:    retq
    233 ;
    234 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
    235 ; AVX2:       # BB#0:
    236 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    237 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
    238 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    239 ; AVX2-NEXT:    retq
    240   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    241   ret <16 x i16> %shuffle
    242 }
    243 
    244 define <16 x i16> @shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    245 ; AVX1-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
    246 ; AVX1:       # BB#0:
    247 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    248 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    249 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
    250 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
    251 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
    252 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    253 ; AVX1-NEXT:    retq
    254 ;
    255 ; AVX2-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
    256 ; AVX2:       # BB#0:
    257 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    258 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
    259 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    260 ; AVX2-NEXT:    retq
    261   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 12, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    262   ret <16 x i16> %shuffle
    263 }
    264 
    265 define <16 x i16> @shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    266 ; AVX1-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
    267 ; AVX1:       # BB#0:
    268 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    269 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    270 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
    271 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
    272 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
    273 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    274 ; AVX1-NEXT:    retq
    275 ;
    276 ; AVX2-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
    277 ; AVX2:       # BB#0:
    278 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    279 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
    280 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    281 ; AVX2-NEXT:    retq
    282   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    283   ret <16 x i16> %shuffle
    284 }
    285 
    286 define <16 x i16> @shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    287 ; AVX1-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
    288 ; AVX1:       # BB#0:
    289 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    290 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    291 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
    292 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
    293 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
    294 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    295 ; AVX1-NEXT:    retq
    296 ;
    297 ; AVX2-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
    298 ; AVX2:       # BB#0:
    299 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    300 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
    301 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    302 ; AVX2-NEXT:    retq
    303   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    304   ret <16 x i16> %shuffle
    305 }
    306 
    307 define <16 x i16> @shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    308 ; AVX1-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
    309 ; AVX1:       # BB#0:
    310 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    311 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    312 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    313 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
    314 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
    315 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    316 ; AVX1-NEXT:    retq
    317 ;
    318 ; AVX2-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
    319 ; AVX2:       # BB#0:
    320 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    321 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
    322 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    323 ; AVX2-NEXT:    retq
    324   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    325   ret <16 x i16> %shuffle
    326 }
    327 
    328 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
    329 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
    330 ; AVX1:       # BB#0:
    331 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
    332 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
    333 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    334 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
    335 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
    336 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    337 ; AVX1-NEXT:    retq
    338 ;
    339 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
    340 ; AVX2:       # BB#0:
    341 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
    342 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
    343 ; AVX2-NEXT:    retq
    344   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
    345   ret <16 x i16> %shuffle
    346 }
    347 
    348 define <16 x i16> @shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15(<16 x i16> %a, <16 x i16> %b) {
    349 ; AVX1-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
    350 ; AVX1:       # BB#0:
    351 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,7,7,7]
    352 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
    353 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    354 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
    355 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
    356 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    357 ; AVX1-NEXT:    retq
    358 ;
    359 ; AVX2-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
    360 ; AVX2:       # BB#0:
    361 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
    362 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,2,3,3,6,6,7,7]
    363 ; AVX2-NEXT:    retq
    364   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
    365   ret <16 x i16> %shuffle
    366 }
    367 
    368 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
    369 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
    370 ; AVX1:       # BB#0:
    371 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
    372 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
    373 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    374 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
    375 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
    376 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    377 ; AVX1-NEXT:    retq
    378 ;
    379 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
    380 ; AVX2:       # BB#0:
    381 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
    382 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
    383 ; AVX2-NEXT:    retq
    384   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
    385   ret <16 x i16> %shuffle
    386 }
    387 
    388 define <16 x i16> @shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15(<16 x i16> %a, <16 x i16> %b) {
    389 ; AVX1-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
    390 ; AVX1:       # BB#0:
    391 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
    392 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,7,7,7]
    393 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    394 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
    395 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
    396 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    397 ; AVX1-NEXT:    retq
    398 ;
    399 ; AVX2-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
    400 ; AVX2:       # BB#0:
    401 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15]
    402 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
    403 ; AVX2-NEXT:    retq
    404   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7, i32 11, i32 11, i32 11, i32 11, i32 15, i32 15, i32 15, i32 15>
    405   ret <16 x i16> %shuffle
    406 }
    407 
    408 define <16 x i16> @shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14(<16 x i16> %a, <16 x i16> %b) {
    409 ; AVX1-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
    410 ; AVX1:       # BB#0:
    411 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7]
    412 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
    413 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    414 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
    415 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6]
    416 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    417 ; AVX1-NEXT:    retq
    418 ;
    419 ; AVX2-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
    420 ; AVX2:       # BB#0:
    421 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,2,2,4,5,6,7,8,8,10,10,12,13,14,15]
    422 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,6,8,9,10,11,12,12,14,14]
    423 ; AVX2-NEXT:    retq
    424   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
    425   ret <16 x i16> %shuffle
    426 }
    427 
    428 define <16 x i16> @shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15(<16 x i16> %a, <16 x i16> %b) {
    429 ; AVX1-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
    430 ; AVX1:       # BB#0:
    431 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[1,1,3,3,4,5,6,7]
    432 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,7,7]
    433 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    434 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,1,3,3,4,5,6,7]
    435 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7]
    436 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    437 ; AVX1-NEXT:    retq
    438 ;
    439 ; AVX2-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
    440 ; AVX2:       # BB#0:
    441 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15]
    442 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,7,7,8,9,10,11,13,13,15,15]
    443 ; AVX2-NEXT:    retq
    444   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
    445   ret <16 x i16> %shuffle
    446 }
    447 
    448 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00(<16 x i16> %a, <16 x i16> %b) {
    449 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
    450 ; AVX1:       # BB#0:
    451 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
    452 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    453 ; AVX1-NEXT:    retq
    454 ;
    455 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
    456 ; AVX2:       # BB#0:
    457 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
    458 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
    459 ; AVX2-NEXT:    retq
    460   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
    461   ret <16 x i16> %shuffle
    462 }
    463 
    464 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00(<16 x i16> %a, <16 x i16> %b) {
    465 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
    466 ; AVX1:       # BB#0:
    467 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
    468 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    469 ; AVX1-NEXT:    retq
    470 ;
    471 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
    472 ; AVX2:       # BB#0:
    473 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
    474 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
    475 ; AVX2-NEXT:    retq
    476   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
    477   ret <16 x i16> %shuffle
    478 }
    479 
    480 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    481 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
    482 ; AVX1:       # BB#0:
    483 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
    484 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    485 ; AVX1-NEXT:    retq
    486 ;
    487 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
    488 ; AVX2:       # BB#0:
    489 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
    490 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
    491 ; AVX2-NEXT:    retq
    492   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
    493   ret <16 x i16> %shuffle
    494 }
    495 
    496 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    497 ; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00:
    498 ; AVX1:       # BB#0:
    499 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
    500 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    501 ; AVX1-NEXT:    retq
    502 ;
    503 ; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00:
    504 ; AVX2:       # BB#0:
    505 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
    506 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
    507 ; AVX2-NEXT:    retq
    508   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
    509   ret <16 x i16> %shuffle
    510 }
    511 
    512 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    513 ; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00:
    514 ; AVX1:       # BB#0:
    515 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
    516 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    517 ; AVX1-NEXT:    retq
    518 ;
    519 ; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00:
    520 ; AVX2:       # BB#0:
    521 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
    522 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
    523 ; AVX2-NEXT:    retq
    524   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
    525   ret <16 x i16> %shuffle
    526 }
    527 
    528 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    529 ; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
    530 ; AVX1:       # BB#0:
    531 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
    532 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    533 ; AVX1-NEXT:    retq
    534 ;
    535 ; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
    536 ; AVX2:       # BB#0:
    537 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
    538 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
    539 ; AVX2-NEXT:    retq
    540   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    541   ret <16 x i16> %shuffle
    542 }
    543 
    544 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    545 ; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
    546 ; AVX1:       # BB#0:
    547 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    548 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    549 ; AVX1-NEXT:    retq
    550 ;
    551 ; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
    552 ; AVX2:       # BB#0:
    553 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    554 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
    555 ; AVX2-NEXT:    retq
    556   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    557   ret <16 x i16> %shuffle
    558 }
    559 
    560 define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31(<16 x i16> %a, <16 x i16> %b) {
    561 ; AVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
    562 ; AVX1:       # BB#0:
    563 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0]
    564 ; AVX1-NEXT:    vandnps %ymm1, %ymm2, %ymm1
    565 ; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
    566 ; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
    567 ; AVX1-NEXT:    retq
    568 ;
    569 ; AVX2-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
    570 ; AVX2:       # BB#0:
    571 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
    572 ; AVX2-NEXT:    retq
    573   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
    574   ret <16 x i16> %shuffle
    575 }
    576 
    577 define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15(<16 x i16> %a, <16 x i16> %b) {
    578 ; AVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15:
    579 ; AVX1:       # BB#0:
    580 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0]
    581 ; AVX1-NEXT:    vandnps %ymm0, %ymm2, %ymm0
    582 ; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
    583 ; AVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
    584 ; AVX1-NEXT:    retq
    585 ;
    586 ; AVX2-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15:
    587 ; AVX2:       # BB#0:
    588 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
    589 ; AVX2-NEXT:    retq
    590   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
    591   ret <16 x i16> %shuffle
    592 }
    593 
    594 define <16 x i16> @shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31(<16 x i16> %a, <16 x i16> %b) {
    595 ; AVX1-LABEL: shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31:
    596 ; AVX1:       # BB#0:
    597 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
    598 ; AVX1-NEXT:    retq
    599 ;
    600 ; AVX2-LABEL: shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31:
    601 ; AVX2:       # BB#0:
    602 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
    603 ; AVX2-NEXT:    retq
    604   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
    605   ret <16 x i16> %shuffle
    606 }
    607 
    608 define <16 x i16> @shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
    609 ; AVX1-LABEL: shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15:
    610 ; AVX1:       # BB#0:
    611 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
    612 ; AVX1-NEXT:    retq
    613 ;
    614 ; AVX2-LABEL: shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15:
    615 ; AVX2:       # BB#0:
    616 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
    617 ; AVX2-NEXT:    retq
    618   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 12, i32 13, i32 14, i32 15>
    619   ret <16 x i16> %shuffle
    620 }
    621 
    622 define <16 x i16> @shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31(<16 x i16> %a, <16 x i16> %b) {
    623 ; AVX1-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31:
    624 ; AVX1:       # BB#0:
    625 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,0]
    626 ; AVX1-NEXT:    vandnps %ymm1, %ymm2, %ymm1
    627 ; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
    628 ; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
    629 ; AVX1-NEXT:    retq
    630 ;
    631 ; AVX2-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31:
    632 ; AVX2:       # BB#0:
    633 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0]
    634 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
    635 ; AVX2-NEXT:    retq
    636   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31>
    637   ret <16 x i16> %shuffle
    638 }
    639 
    640 define <16 x i16> @shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
    641 ; AVX1-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15:
    642 ; AVX1:       # BB#0:
    643 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [0,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535]
    644 ; AVX1-NEXT:    vandnps %ymm1, %ymm2, %ymm1
    645 ; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
    646 ; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
    647 ; AVX1-NEXT:    retq
    648 ;
    649 ; AVX2-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15:
    650 ; AVX2:       # BB#0:
    651 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
    652 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
    653 ; AVX2-NEXT:    retq
    654   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    655   ret <16 x i16> %shuffle
    656 }
    657 
    658 define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15(<16 x i16> %a, <16 x i16> %b) {
    659 ; AVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15:
    660 ; AVX1:       # BB#0:
    661 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [65535,0,65535,0,65535,0,65535,0,0,65535,0,65535,0,65535,0,65535]
    662 ; AVX1-NEXT:    vandnps %ymm1, %ymm2, %ymm1
    663 ; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
    664 ; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
    665 ; AVX1-NEXT:    retq
    666 ;
    667 ; AVX2-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15:
    668 ; AVX2:       # BB#0:
    669 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255]
    670 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
    671 ; AVX2-NEXT:    retq
    672   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
    673   ret <16 x i16> %shuffle
    674 }
    675 
    676 define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31(<16 x i16> %a, <16 x i16> %b) {
    677 ; AVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31:
    678 ; AVX1:       # BB#0:
    679 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [0,65535,0,65535,0,65535,0,65535,65535,0,65535,0,65535,0,65535,0]
    680 ; AVX1-NEXT:    vandnps %ymm1, %ymm2, %ymm1
    681 ; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
    682 ; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
    683 ; AVX1-NEXT:    retq
    684 ;
    685 ; AVX2-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31:
    686 ; AVX2:       # BB#0:
    687 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0]
    688 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
    689 ; AVX2-NEXT:    retq
    690   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
    691   ret <16 x i16> %shuffle
    692 }
    693 
    694 define <16 x i16> @shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31(<16 x i16> %a, <16 x i16> %b) {
    695 ; AVX1-LABEL: shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31:
    696 ; AVX1:       # BB#0:
    697 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4],ymm1[5],ymm0[6],ymm1[7]
    698 ; AVX1-NEXT:    retq
    699 ;
    700 ; AVX2-LABEL: shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31:
    701 ; AVX2:       # BB#0:
    702 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4],ymm1[5],ymm0[6],ymm1[7]
    703 ; AVX2-NEXT:    retq
    704   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 6, i32 7, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
    705   ret <16 x i16> %shuffle
    706 }
    707 
    708 define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16(<16 x i16> %a, <16 x i16> %b) {
    709 ; AVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16:
    710 ; AVX1:       # BB#0:
    711 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    712 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    713 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    714 ; AVX1-NEXT:    retq
    715 ;
    716 ; AVX2-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16:
    717 ; AVX2:       # BB#0:
    718 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    719 ; AVX2-NEXT:    vpbroadcastd %xmm0, %ymm0
    720 ; AVX2-NEXT:    retq
    721   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16>
    722   ret <16 x i16> %shuffle
    723 }
    724 
    725 define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24(<16 x i16> %a, <16 x i16> %b) {
    726 ; AVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
    727 ; AVX1:       # BB#0:
    728 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    729 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
    730 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
    731 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    732 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    733 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    734 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
    735 ; AVX1-NEXT:    retq
    736 ;
    737 ; AVX2-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
    738 ; AVX2:       # BB#0:
    739 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
    740 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm1 = ymm1[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
    741 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,0,1,1,4,4,5,5]
    742 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
    743 ; AVX2-NEXT:    retq
    744   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24>
    745   ret <16 x i16> %shuffle
    746 }
    747 
    748 define <16 x i16> @shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
    749 ; AVX1-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15:
    750 ; AVX1:       # BB#0:
    751 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    752 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
    753 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
    754 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7]
    755 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
    756 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    757 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
    758 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
    759 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    760 ; AVX1-NEXT:    retq
    761 ;
    762 ; AVX2-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15:
    763 ; AVX2:       # BB#0:
    764 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm1 = ymm1[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
    765 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
    766 ; AVX2-NEXT:    retq
    767   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 4, i32 5, i32 6, i32 7, i32 24, i32 24, i32 24, i32 24, i32 12, i32 13, i32 14, i32 15>
    768   ret <16 x i16> %shuffle
    769 }
    770 
    771 define <16 x i16> @shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12(<16 x i16> %a, <16 x i16> %b) {
    772 ; AVX1-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
    773 ; AVX1:       # BB#0:
    774 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    775 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
    776 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
    777 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
    778 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7]
    779 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
    780 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
    781 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
    782 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
    783 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
    784 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    785 ; AVX1-NEXT:    retq
    786 ;
    787 ; AVX2-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
    788 ; AVX2:       # BB#0:
    789 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
    790 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
    791 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12]
    792 ; AVX2-NEXT:    retq
    793   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 7, i32 6, i32 5, i32 4, i32 27, i32 26, i32 25, i32 24, i32 15, i32 14, i32 13, i32 12>
    794   ret <16 x i16> %shuffle
    795 }
    796 
    797 define <16 x i16> @shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08(<16 x i16> %a, <16 x i16> %b) {
    798 ; AVX1-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08:
    799 ; AVX1:       # BB#0:
    800 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    801 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
    802 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
    803 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [12,13,8,9,4,5,0,1,14,15,10,11,6,7,2,3]
    804 ; AVX1-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
    805 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
    806 ; AVX1-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
    807 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    808 ; AVX1-NEXT:    retq
    809 ;
    810 ; AVX2-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08:
    811 ; AVX2:       # BB#0:
    812 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm1 = ymm1[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
    813 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
    814 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12]
    815 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
    816 ; AVX2-NEXT:    retq
    817   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 3, i32 2, i32 1, i32 0, i32 27, i32 26, i32 25, i32 24, i32 11, i32 10, i32 9, i32 8>
    818   ret <16 x i16> %shuffle
    819 }
    820 
    821 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08(<16 x i16> %a, <16 x i16> %b) {
    822 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
    823 ; AVX1:       # BB#0:
    824 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    825 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
    826 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    827 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    828 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    829 ; AVX1-NEXT:    retq
    830 ;
    831 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
    832 ; AVX2:       # BB#0:
    833 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,18,19,16,17]
    834 ; AVX2-NEXT:    retq
    835   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 8>
    836   ret <16 x i16> %shuffle
    837 }
    838 
    839 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08(<16 x i16> %a, <16 x i16> %b) {
    840 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
    841 ; AVX1:       # BB#0:
    842 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    843 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
    844 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    845 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    846 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    847 ; AVX1-NEXT:    retq
    848 ;
    849 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
    850 ; AVX2:       # BB#0:
    851 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,20,21,16,17,16,17]
    852 ; AVX2-NEXT:    retq
    853   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 10, i32 8, i32 8>
    854   ret <16 x i16> %shuffle
    855 }
    856 
    857 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08(<16 x i16> %a, <16 x i16> %b) {
    858 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
    859 ; AVX1:       # BB#0:
    860 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    861 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
    862 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    863 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    864 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    865 ; AVX1-NEXT:    retq
    866 ;
    867 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
    868 ; AVX2:       # BB#0:
    869 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,22,23,16,17,16,17,16,17]
    870 ; AVX2-NEXT:    retq
    871   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 11, i32 8, i32 8, i32 8>
    872   ret <16 x i16> %shuffle
    873 }
    874 
    875 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
    876 ; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08:
    877 ; AVX1:       # BB#0:
    878 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    879 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
    880 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    881 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    882 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    883 ; AVX1-NEXT:    retq
    884 ;
    885 ; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08:
    886 ; AVX2:       # BB#0:
    887 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,24,25,16,17,16,17,16,17,16,17]
    888 ; AVX2-NEXT:    retq
    889   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 12, i32 8, i32 8, i32 8, i32 8>
    890   ret <16 x i16> %shuffle
    891 }
    892 
    893 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
    894 ; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08:
    895 ; AVX1:       # BB#0:
    896 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    897 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
    898 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    899 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    900 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    901 ; AVX1-NEXT:    retq
    902 ;
    903 ; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08:
    904 ; AVX2:       # BB#0:
    905 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,26,27,16,17,16,17,16,17,16,17,16,17]
    906 ; AVX2-NEXT:    retq
    907   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 13, i32 8, i32 8, i32 8, i32 8, i32 8>
    908   ret <16 x i16> %shuffle
    909 }
    910 
    911 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
    912 ; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08:
    913 ; AVX1:       # BB#0:
    914 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    915 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
    916 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    917 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    918 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    919 ; AVX1-NEXT:    retq
    920 ;
    921 ; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08:
    922 ; AVX2:       # BB#0:
    923 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,28,29,16,17,16,17,16,17,16,17,16,17,16,17]
    924 ; AVX2-NEXT:    retq
    925   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 14, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
    926   ret <16 x i16> %shuffle
    927 }
    928 
    929 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
    930 ; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08:
    931 ; AVX1:       # BB#0:
    932 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    933 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    934 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    935 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    936 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    937 ; AVX1-NEXT:    retq
    938 ;
    939 ; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08:
    940 ; AVX2:       # BB#0:
    941 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,30,31,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    942 ; AVX2-NEXT:    retq
    943   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
    944   ret <16 x i16> %shuffle
    945 }
    946 
    947 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) {
    948 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27:
    949 ; AVX1:       # BB#0:
    950 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    951 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
    952 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
    953 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    954 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    955 ; AVX1-NEXT:    retq
    956 ;
    957 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27:
    958 ; AVX2:       # BB#0:
    959 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
    960 ; AVX2-NEXT:    retq
    961   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
    962   ret <16 x i16> %shuffle
    963 }
    964 
    965 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
    966 ; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31:
    967 ; AVX1:       # BB#0:
    968 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    969 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
    970 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
    971 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    972 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    973 ; AVX1-NEXT:    retq
    974 ;
    975 ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31:
    976 ; AVX2:       # BB#0:
    977 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
    978 ; AVX2-NEXT:    retq
    979   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
    980   ret <16 x i16> %shuffle
    981 }
    982 
    983 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
    984 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31:
    985 ; AVX1:       # BB#0:
    986 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    987 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
    988 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
    989 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    990 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    991 ; AVX1-NEXT:    retq
    992 ;
    993 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31:
    994 ; AVX2:       # BB#0:
    995 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,6,7,u,u,24,25,u,u,26,27,u,u,28,29,u,u,30,31]
    996 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,2,3,u,u,4,5,u,u,6,7,u,u,24,25,u,u,26,27,u,u,28,29,u,u,30,31,u,u]
    997 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
    998 ; AVX2-NEXT:    retq
    999   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
   1000   ret <16 x i16> %shuffle
   1001 }
   1002 
   1003 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) {
   1004 ; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27:
   1005 ; AVX1:       # BB#0:
   1006 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   1007 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   1008 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
   1009 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1010 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1011 ; AVX1-NEXT:    retq
   1012 ;
   1013 ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27:
   1014 ; AVX2:       # BB#0:
   1015 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,8,9,u,u,10,11,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
   1016 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,u,u,10,11,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23,u,u]
   1017 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
   1018 ; AVX2-NEXT:    retq
   1019   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
   1020   ret <16 x i16> %shuffle
   1021 }
   1022 
   1023 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   1024 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08:
   1025 ; AVX1:       # BB#0:
   1026 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
   1027 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1028 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,0,1,0,1,0,1,0,1,0,1,0,1]
   1029 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1030 ; AVX1-NEXT:    retq
   1031 ;
   1032 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08:
   1033 ; AVX2:       # BB#0:
   1034 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,18,19,16,17,16,17,16,17,16,17,16,17,16,17]
   1035 ; AVX2-NEXT:    retq
   1036   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 8, i32 9, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
   1037   ret <16 x i16> %shuffle
   1038 }
   1039 
   1040 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   1041 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08:
   1042 ; AVX1:       # BB#0:
   1043 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
   1044 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1045 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,0,1,0,1,0,1,0,1,0,1]
   1046 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1047 ; AVX1-NEXT:    retq
   1048 ;
   1049 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08:
   1050 ; AVX2:       # BB#0:
   1051 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,20,21,16,17,16,17,16,17,16,17,16,17]
   1052 ; AVX2-NEXT:    retq
   1053   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 8, i32 8, i32 10, i32 8, i32 8, i32 8, i32 8, i32 8>
   1054   ret <16 x i16> %shuffle
   1055 }
   1056 
   1057 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   1058 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08:
   1059 ; AVX1:       # BB#0:
   1060 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
   1061 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1062 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,6,7,0,1,0,1,0,1,0,1]
   1063 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1064 ; AVX1-NEXT:    retq
   1065 ;
   1066 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08:
   1067 ; AVX2:       # BB#0:
   1068 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,22,23,16,17,16,17,16,17,16,17]
   1069 ; AVX2-NEXT:    retq
   1070   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 11, i32 8, i32 8, i32 8, i32 8>
   1071   ret <16 x i16> %shuffle
   1072 }
   1073 
   1074 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   1075 ; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08:
   1076 ; AVX1:       # BB#0:
   1077 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
   1078 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1079 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,0,1,0,1,0,1]
   1080 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1081 ; AVX1-NEXT:    retq
   1082 ;
   1083 ; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08:
   1084 ; AVX2:       # BB#0:
   1085 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,16,17,16,17,16,17]
   1086 ; AVX2-NEXT:    retq
   1087   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 12, i32 8, i32 8, i32 8>
   1088   ret <16 x i16> %shuffle
   1089 }
   1090 
   1091 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08(<16 x i16> %a, <16 x i16> %b) {
   1092 ; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08:
   1093 ; AVX1:       # BB#0:
   1094 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
   1095 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1096 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1]
   1097 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1098 ; AVX1-NEXT:    retq
   1099 ;
   1100 ; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08:
   1101 ; AVX2:       # BB#0:
   1102 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,26,27,16,17,16,17]
   1103 ; AVX2-NEXT:    retq
   1104   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 13, i32 8, i32 8>
   1105   ret <16 x i16> %shuffle
   1106 }
   1107 
   1108 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08(<16 x i16> %a, <16 x i16> %b) {
   1109 ; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08:
   1110 ; AVX1:       # BB#0:
   1111 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
   1112 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1113 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,12,13,0,1]
   1114 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1115 ; AVX1-NEXT:    retq
   1116 ;
   1117 ; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08:
   1118 ; AVX2:       # BB#0:
   1119 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,28,29,16,17]
   1120 ; AVX2-NEXT:    retq
   1121   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 14, i32 8>
   1122   ret <16 x i16> %shuffle
   1123 }
   1124 
   1125 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15(<16 x i16> %a, <16 x i16> %b) {
   1126 ; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15:
   1127 ; AVX1:       # BB#0:
   1128 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
   1129 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1130 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,14,15]
   1131 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1132 ; AVX1-NEXT:    retq
   1133 ;
   1134 ; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15:
   1135 ; AVX2:       # BB#0:
   1136 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,30,31]
   1137 ; AVX2-NEXT:    retq
   1138   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 15>
   1139   ret <16 x i16> %shuffle
   1140 }
   1141 
   1142 define <16 x i16> @shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08(<16 x i16> %a, <16 x i16> %b) {
   1143 ; AVX1-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08:
   1144 ; AVX1:       # BB#0:
   1145 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7]
   1146 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
   1147 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1148 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,8,9,8,9,4,5,4,5,0,1,0,1]
   1149 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1150 ; AVX1-NEXT:    retq
   1151 ;
   1152 ; AVX2-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08:
   1153 ; AVX2:       # BB#0:
   1154 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13,28,29,28,29,24,25,24,25,20,21,20,21,16,17,16,17]
   1155 ; AVX2-NEXT:    retq
   1156   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 14, i32 14, i32 12, i32 12, i32 10, i32 10, i32 8, i32 8>
   1157   ret <16 x i16> %shuffle
   1158 }
   1159 
   1160 define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   1161 ; AVX1-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12:
   1162 ; AVX1:       # BB#0:
   1163 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
   1164 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1165 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   1166 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
   1167 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1168 ; AVX1-NEXT:    retq
   1169 ;
   1170 ; AVX2-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12:
   1171 ; AVX2:       # BB#0:
   1172 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
   1173 ; AVX2-NEXT:    retq
   1174   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
   1175   ret <16 x i16> %shuffle
   1176 }
   1177 
   1178 define <16 x i16> @shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08(<16 x i16> %a, <16 x i16> %b) {
   1179 ; AVX1-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08:
   1180 ; AVX1:       # BB#0:
   1181 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
   1182 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
   1183 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1184 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,6,7,0,1,0,1,12,13,0,1]
   1185 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1186 ; AVX1-NEXT:    retq
   1187 ;
   1188 ; AVX2-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08:
   1189 ; AVX2:       # BB#0:
   1190 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,28,29,16,17]
   1191 ; AVX2-NEXT:    retq
   1192   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 undef, i32 undef, i32 8, i32 8, i32 14, i32 8>
   1193   ret <16 x i16> %shuffle
   1194 }
   1195 
   1196 define <16 x i16> @shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15(<16 x i16> %a, <16 x i16> %b) {
   1197 ; AVX1-LABEL: shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15:
   1198 ; AVX1:       # BB#0:
   1199 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[14,15,2,3,0,1,0,1,0,1,0,1,0,1,0,1]
   1200 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1201 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,6,7,0,1,0,1,0,1,14,15]
   1202 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1203 ; AVX1-NEXT:    retq
   1204 ;
   1205 ; AVX2-LABEL: shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15:
   1206 ; AVX2:       # BB#0:
   1207 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,u,u,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,16,17,30,31]
   1208 ; AVX2-NEXT:    retq
   1209   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 undef, i32 undef, i32 8, i32 8, i32 8, i32 15>
   1210   ret <16 x i16> %shuffle
   1211 }
   1212 
   1213 define <16 x i16> @shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08(<16 x i16> %a, <16 x i16> %b) {
   1214 ; AVX1-LABEL: shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08:
   1215 ; AVX1:       # BB#0:
   1216 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,1,2,2,4,5,6,7]
   1217 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
   1218 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1219 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,12,13,8,9,4,5,4,5,0,1,0,1]
   1220 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1221 ; AVX1-NEXT:    retq
   1222 ;
   1223 ; AVX2-LABEL: shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08:
   1224 ; AVX2:       # BB#0:
   1225 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,4,5,8,9,8,9,u,u,12,13,28,29,28,29,u,u,24,25,20,21,20,21,16,17,16,17]
   1226 ; AVX2-NEXT:    retq
   1227   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 undef, i32 2, i32 4, i32 4, i32 undef, i32 6, i32 14, i32 14, i32 undef, i32 12, i32 10, i32 10, i32 8, i32 8>
   1228   ret <16 x i16> %shuffle
   1229 }
   1230 
   1231 define <16 x i16> @shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   1232 ; AVX1-LABEL: shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12:
   1233 ; AVX1:       # BB#0:
   1234 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,1,2,3]
   1235 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
   1236 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1237 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,3,4,5,6,7]
   1238 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
   1239 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1240 ; AVX1-NEXT:    retq
   1241 ;
   1242 ; AVX2-LABEL: shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12:
   1243 ; AVX2:       # BB#0:
   1244 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,u,u,u,u,u,u,u,u,16,17,16,17,16,17,u,u,u,u,24,25,24,25,24,25]
   1245 ; AVX2-NEXT:    retq
   1246   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 12, i32 12, i32 12>
   1247   ret <16 x i16> %shuffle
   1248 }
   1249 
   1250 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20(<16 x i16> %a, <16 x i16> %b) {
   1251 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20:
   1252 ; AVX1:       # BB#0:
   1253 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   1254 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
   1255 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
   1256 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
   1257 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1258 ; AVX1-NEXT:    retq
   1259 ;
   1260 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20:
   1261 ; AVX2:       # BB#0:
   1262 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1263 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
   1264 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
   1265 ; AVX2-NEXT:    retq
   1266   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20>
   1267   ret <16 x i16> %shuffle
   1268 }
   1269 
   1270 define <16 x i16> @shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20(<16 x i16> %a, <16 x i16> %b) {
   1271 ; AVX1-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20:
   1272 ; AVX1:       # BB#0:
   1273 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1274 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   1275 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
   1276 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
   1277 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
   1278 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1279 ; AVX1-NEXT:    retq
   1280 ;
   1281 ; AVX2-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20:
   1282 ; AVX2:       # BB#0:
   1283 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
   1284 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
   1285 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
   1286 ; AVX2-NEXT:    retq
   1287   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20>
   1288   ret <16 x i16> %shuffle
   1289 }
   1290 
   1291 define <16 x i16> @shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28(<16 x i16> %a, <16 x i16> %b) {
   1292 ; AVX1-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28:
   1293 ; AVX1:       # BB#0:
   1294 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1295 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   1296 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
   1297 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
   1298 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
   1299 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
   1300 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1301 ; AVX1-NEXT:    retq
   1302 ;
   1303 ; AVX2-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28:
   1304 ; AVX2:       # BB#0:
   1305 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
   1306 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
   1307 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
   1308 ; AVX2-NEXT:    retq
   1309   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28>
   1310   ret <16 x i16> %shuffle
   1311 }
   1312 
   1313 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28(<16 x i16> %a, <16 x i16> %b) {
   1314 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28:
   1315 ; AVX1:       # BB#0:
   1316 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   1317 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
   1318 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
   1319 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
   1320 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
   1321 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1322 ; AVX1-NEXT:    retq
   1323 ;
   1324 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28:
   1325 ; AVX2:       # BB#0:
   1326 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
   1327 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
   1328 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
   1329 ; AVX2-NEXT:    retq
   1330   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28>
   1331   ret <16 x i16> %shuffle
   1332 }
   1333 
   1334 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i16> %a, <16 x i16> %b) {
   1335 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
   1336 ; AVX1:       # BB#0:
   1337 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1338 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1339 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1340 ; AVX1-NEXT:    retq
   1341 ;
   1342 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
   1343 ; AVX2:       # BB#0:
   1344 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1345 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1346 ; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
   1347 ; AVX2-NEXT:    retq
   1348   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
   1349   ret <16 x i16> %shuffle
   1350 }
   1351 
   1352 define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) {
   1353 ; AVX1-LABEL: shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24:
   1354 ; AVX1:       # BB#0:
   1355 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
   1356 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1357 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
   1358 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1359 ; AVX1-NEXT:    retq
   1360 ;
   1361 ; AVX2-LABEL: shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24:
   1362 ; AVX2:       # BB#0:
   1363 ; AVX2-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
   1364 ; AVX2-NEXT:    retq
   1365   %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24>
   1366   ret <16 x i16> %shuffle
   1367 }
   1368 
   1369 define <16 x i16> @shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz(<16 x i16> %a) {
   1370 ; AVX1-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz:
   1371 ; AVX1:       # BB#0:
   1372 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
   1373 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1374 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
   1375 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1376 ; AVX1-NEXT:    retq
   1377 ;
   1378 ; AVX2-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz:
   1379 ; AVX2:       # BB#0:
   1380 ; AVX2-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero
   1381 ; AVX2-NEXT:    retq
   1382   %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0>
   1383   ret <16 x i16> %shuffle
   1384 }
   1385 
   1386 ;
   1387 ; Shuffle to logical bit shifts
   1388 ;
   1389 
   1390 define <16 x i16> @shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14(<16 x i16> %a) {
   1391 ; AVX1-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
   1392 ; AVX1:       # BB#0:
   1393 ; AVX1-NEXT:    vpslld $16, %xmm0, %xmm1
   1394 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1395 ; AVX1-NEXT:    vpslld $16, %xmm0, %xmm0
   1396 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1397 ; AVX1-NEXT:    retq
   1398 ;
   1399 ; AVX2-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
   1400 ; AVX2:       # BB#0:
   1401 ; AVX2-NEXT:    vpslld $16, %ymm0, %ymm0
   1402 ; AVX2-NEXT:    retq
   1403   %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 0, i32 16, i32 2, i32 16, i32 4, i32 16, i32 6, i32 16, i32 8, i32 16, i32 10, i32 16, i32 12, i32 16, i32 14>
   1404   ret <16 x i16> %shuffle
   1405 }
   1406 
   1407 define <16 x i16> @shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12(<16 x i16> %a) {
   1408 ; AVX1-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
   1409 ; AVX1:       # BB#0:
   1410 ; AVX1-NEXT:    vpsllq $48, %xmm0, %xmm1
   1411 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1412 ; AVX1-NEXT:    vpsllq $48, %xmm0, %xmm0
   1413 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1414 ; AVX1-NEXT:    retq
   1415 ;
   1416 ; AVX2-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
   1417 ; AVX2:       # BB#0:
   1418 ; AVX2-NEXT:    vpsllq $48, %ymm0, %ymm0
   1419 ; AVX2-NEXT:    retq
   1420   %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 12>
   1421   ret <16 x i16> %shuffle
   1422 }
   1423 
   1424 define <16 x i16> @shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz(<16 x i16> %a) {
   1425 ; AVX1-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz:
   1426 ; AVX1:       # BB#0:
   1427 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm1
   1428 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1429 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
   1430 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1431 ; AVX1-NEXT:    retq
   1432 ;
   1433 ; AVX2-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz:
   1434 ; AVX2:       # BB#0:
   1435 ; AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
   1436 ; AVX2-NEXT:    retq
   1437   %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 1, i32 16, i32 3, i32 16, i32 5, i32 16, i32 7, i32 16, i32 9, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15, i32 16>
   1438   ret <16 x i16> %shuffle
   1439 }
   1440 
   1441 define <16 x i16> @shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz(<16 x i16> %a) {
   1442 ; AVX1-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:
   1443 ; AVX1:       # BB#0:
   1444 ; AVX1-NEXT:    vxorps %ymm1, %ymm1, %ymm1
   1445 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
   1446 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
   1447 ; AVX1-NEXT:    retq
   1448 ;
   1449 ; AVX2-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:
   1450 ; AVX2:       # BB#0:
   1451 ; AVX2-NEXT:    vpsrlq $32, %ymm0, %ymm0
   1452 ; AVX2-NEXT:    retq
   1453   %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 16, i32 16, i32 10, i32 11, i32 16, i32 16, i32 14, i32 15, i32 16, i32 16>
   1454   ret <16 x i16> %shuffle
   1455 }
   1456 
   1457 define <16 x i16> @shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz(<16 x i16> %a) {
   1458 ; AVX1-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz:
   1459 ; AVX1:       # BB#0:
   1460 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   1461 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
   1462 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   1463 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1464 ; AVX1-NEXT:    retq
   1465 ;
   1466 ; AVX2-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz:
   1467 ; AVX2:       # BB#0:
   1468 ; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1469 ; AVX2-NEXT:    retq
   1470   %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 16, i32 0, i32 0, i32 0, i32 17, i32 0, i32 0, i32 0, i32 18, i32 0, i32 0, i32 0, i32 19, i32 0, i32 0, i32 0>
   1471   ret <16 x i16> %shuffle
   1472 }
   1473 
   1474 define <16 x i16> @shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz(<16 x i16> %a) {
   1475 ; AVX1-LABEL: shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz:
   1476 ; AVX1:       # BB#0:
   1477 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1478 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1479 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1480 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1481 ; AVX1-NEXT:    retq
   1482 ;
   1483 ; AVX2-LABEL: shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz:
   1484 ; AVX2:       # BB#0:
   1485 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1486 ; AVX2-NEXT:    retq
   1487   %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 16, i32 0, i32 17, i32 0, i32 18, i32 0, i32 19, i32 0, i32 20, i32 0, i32 21, i32 0, i32 22, i32 0, i32 23, i32 0>
   1488   ret <16 x i16> %shuffle
   1489 }
   1490 
   1491 define <16 x i16> @shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14(<16 x i16> %a, <16 x i16> %b) {
   1492 ; AVX1-LABEL: shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14:
   1493 ; AVX1:       # BB#0:
   1494 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   1495 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   1496 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   1497 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   1498 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1499 ; AVX1-NEXT:    retq
   1500 ;
   1501 ; AVX2-LABEL: shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14:
   1502 ; AVX2:       # BB#0:
   1503 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm1[30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
   1504 ; AVX2-NEXT:    retq
   1505   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
   1506   ret <16 x i16> %shuffle
   1507 }
   1508 
   1509 define <16 x i16> @shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24(<16 x i16> %a, <16 x i16> %b) {
   1510 ; AVX1-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24:
   1511 ; AVX1:       # BB#0:
   1512 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   1513 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   1514 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1]
   1515 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1]
   1516 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1517 ; AVX1-NEXT:    retq
   1518 ;
   1519 ; AVX2-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24:
   1520 ; AVX2:       # BB#0:
   1521 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1],ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17]
   1522 ; AVX2-NEXT:    retq
   1523   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24>
   1524   ret <16 x i16> %shuffle
   1525 }
   1526 
   1527 define <16 x i16> @shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8(<16 x i16> %a, <16 x i16> %b) {
   1528 ; AVX1-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8:
   1529 ; AVX1:       # BB#0:
   1530 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   1531 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   1532 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1]
   1533 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
   1534 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1535 ; AVX1-NEXT:    retq
   1536 ;
   1537 ; AVX2-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8:
   1538 ; AVX2:       # BB#0:
   1539 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
   1540 ; AVX2-NEXT:    retq
   1541   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 00, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 8>
   1542   ret <16 x i16> %shuffle
   1543 }
   1544 
   1545 define <16 x i16> @shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30(<16 x i16> %a, <16 x i16> %b) {
   1546 ; AVX1-LABEL: shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30:
   1547 ; AVX1:       # BB#0:
   1548 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   1549 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   1550 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   1551 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   1552 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1553 ; AVX1-NEXT:    retq
   1554 ;
   1555 ; AVX2-LABEL: shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30:
   1556 ; AVX2:       # BB#0:
   1557 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm0[30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
   1558 ; AVX2-NEXT:    retq
   1559   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
   1560   ret <16 x i16> %shuffle
   1561 }
   1562 
   1563 define <16 x i16> @shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16(<16 x i16> %a, <16 x i16> %b) {
   1564 ; AVX1-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16:
   1565 ; AVX1:       # BB#0:
   1566 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
   1567 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
   1568 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1569 ; AVX1-NEXT:    retq
   1570 ;
   1571 ; AVX2-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16:
   1572 ; AVX2:       # BB#0:
   1573 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1574 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,18,19,20,21,22,23,24,25,26,27,28,29,30,31,16,17]
   1575 ; AVX2-NEXT:    retq
   1576   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16>
   1577   ret <16 x i16> %shuffle
   1578 }
   1579 
   1580 define <16 x i16> @shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22(<16 x i16> %a, <16 x i16> %b) {
   1581 ; AVX1-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22:
   1582 ; AVX1:       # BB#0:
   1583 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   1584 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   1585 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1586 ; AVX1-NEXT:    retq
   1587 ;
   1588 ; AVX2-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22:
   1589 ; AVX2:       # BB#0:
   1590 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1591 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,30,31,16,17,18,19,20,21,22,23,24,25,26,27,28,29]
   1592 ; AVX2-NEXT:    retq
   1593   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 23, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
   1594   ret <16 x i16> %shuffle
   1595 }
   1596 
   1597 define <16 x i16> @shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11(<16 x i16> %a, <16 x i16> %b) {
   1598 ; AVX1-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
   1599 ; AVX1:       # BB#0:
   1600 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1601 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1602 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7]
   1603 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7]
   1604 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
   1605 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1606 ; AVX1-NEXT:    retq
   1607 ;
   1608 ; AVX2-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
   1609 ; AVX2:       # BB#0:
   1610 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1611 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1612 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7]
   1613 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7]
   1614 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
   1615 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1616 ; AVX2-NEXT:    retq
   1617   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 11, i32 8, i32 9, i32 8, i32 9, i32 10, i32 11, i32 10, i32 11>
   1618   ret <16 x i16> %shuffle
   1619 }
   1620 
   1621 define <16 x i16> @shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09(<16 x i16> %a, <16 x i16> %b) {
   1622 ; AVX1-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09:
   1623 ; AVX1:       # BB#0:
   1624 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1625 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
   1626 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
   1627 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0]
   1628 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1629 ; AVX1-NEXT:    retq
   1630 ;
   1631 ; AVX2-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09:
   1632 ; AVX2:       # BB#0:
   1633 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1634 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
   1635 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
   1636 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0]
   1637 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1638 ; AVX2-NEXT:    retq
   1639   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 9, i32 14, i32 15, i32 12, i32 13, i32 10, i32 11, i32 8, i32 9>
   1640   ret <16 x i16> %shuffle
   1641 }
   1642 
   1643 define <16 x i16> @shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27(<16 x i16> %a, <16 x i16> %b) {
   1644 ; AVX1-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27:
   1645 ; AVX1:       # BB#0:
   1646 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   1647 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   1648 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7]
   1649 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
   1650 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15]
   1651 ; AVX1-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
   1652 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1653 ; AVX1-NEXT:    retq
   1654 ;
   1655 ; AVX2-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27:
   1656 ; AVX2:       # BB#0:
   1657 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
   1658 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1659 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   1660 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
   1661 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
   1662 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1663 ; AVX2-NEXT:    retq
   1664   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 27, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27>
   1665   ret <16 x i16> %shuffle
   1666 }
   1667 
   1668 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   1669 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
   1670 ; AVX1:       # BB#0:
   1671 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1672 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1673 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3]
   1674 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
   1675 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
   1676 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1677 ; AVX1-NEXT:    retq
   1678 ;
   1679 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
   1680 ; AVX2:       # BB#0:
   1681 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1682 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1683 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3]
   1684 ; AVX2-NEXT:    vpbroadcastw %xmm1, %xmm1
   1685 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1686 ; AVX2-NEXT:    retq
   1687   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
   1688   ret <16 x i16> %shuffle
   1689 }
   1690 
   1691 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   1692 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
   1693 ; AVX1:       # BB#0:
   1694 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1695 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   1696 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   1697 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,7]
   1698 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1699 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
   1700 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
   1701 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1702 ; AVX1-NEXT:    retq
   1703 ;
   1704 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
   1705 ; AVX2:       # BB#0:
   1706 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1707 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm2
   1708 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   1709 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,7]
   1710 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1711 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
   1712 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
   1713 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1714 ; AVX2-NEXT:    retq
   1715   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 12, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
   1716   ret <16 x i16> %shuffle
   1717 }
   1718 
   1719 define <16 x i16> @shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11(<16 x i16> %a, <16 x i16> %b) {
   1720 ; AVX1-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
   1721 ; AVX1:       # BB#0:
   1722 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1723 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1724 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1725 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
   1726 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
   1727 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1728 ; AVX1-NEXT:    retq
   1729 ;
   1730 ; AVX2-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
   1731 ; AVX2:       # BB#0:
   1732 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1733 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1734 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1735 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
   1736 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
   1737 ; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
   1738 ; AVX2-NEXT:    retq
   1739   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 11, i32 undef, i32 8, i32 undef, i32 9, i32 undef, i32 10, i32 undef, i32 11>
   1740   ret <16 x i16> %shuffle
   1741 }
   1742 
   1743 define <16 x i16> @shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15(<16 x i16> %a, <16 x i16> %b) {
   1744 ; AVX1-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
   1745 ; AVX1:       # BB#0:
   1746 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1747 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1748 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1749 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
   1750 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
   1751 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1752 ; AVX1-NEXT:    retq
   1753 ;
   1754 ; AVX2-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
   1755 ; AVX2:       # BB#0:
   1756 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1757 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1758 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1759 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
   1760 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
   1761 ; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
   1762 ; AVX2-NEXT:    retq
   1763   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 15, i32 undef, i32 12, i32 undef, i32 13, i32 undef, i32 14, i32 undef, i32 15>
   1764   ret <16 x i16> %shuffle
   1765 }
   1766 
   1767 define <16 x i16> @shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13(<16 x i16> %a, <16 x i16> %b) {
   1768 ; AVX1-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
   1769 ; AVX1:       # BB#0:
   1770 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1771 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
   1772 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
   1773 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
   1774 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7]
   1775 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
   1776 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1777 ; AVX1-NEXT:    retq
   1778 ;
   1779 ; AVX2-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
   1780 ; AVX2:       # BB#0:
   1781 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1782 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
   1783 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
   1784 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
   1785 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7]
   1786 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
   1787 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1788 ; AVX2-NEXT:    retq
   1789   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 13, i32 11, i32 9, i32 10, i32 8, i32 14, i32 15, i32 12, i32 13>
   1790   ret <16 x i16> %shuffle
   1791 }
   1792 
   1793 define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   1794 ; AVX1-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
   1795 ; AVX1:       # BB#0:
   1796 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1797 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
   1798 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,14,15]
   1799 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1800 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
   1801 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1802 ; AVX1-NEXT:    retq
   1803 ;
   1804 ; AVX2-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
   1805 ; AVX2:       # BB#0:
   1806 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1807 ; AVX2-NEXT:    vpbroadcastw %xmm1, %xmm2
   1808 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,14,15]
   1809 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1810 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
   1811 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1812 ; AVX2-NEXT:    retq
   1813   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 8, i32 12, i32 12, i32 12, i32 12, i32 8, i32 8, i32 8, i32 8>
   1814   ret <16 x i16> %shuffle
   1815 }
   1816 
   1817 define <16 x i16> @shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13(<16 x i16> %a, <16 x i16> %b) {
   1818 ; AVX1-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13:
   1819 ; AVX1:       # BB#0:
   1820 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1821 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
   1822 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
   1823 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,0,3,2]
   1824 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1825 ; AVX1-NEXT:    retq
   1826 ;
   1827 ; AVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13:
   1828 ; AVX2:       # BB#0:
   1829 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1830 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
   1831 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
   1832 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,0,3,2]
   1833 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1834 ; AVX2-NEXT:    retq
   1835   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 13, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
   1836   ret <16 x i16> %shuffle
   1837 }
   1838 
   1839 define <16 x i16> @shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13(<16 x i16> %a, <16 x i16> %b) {
   1840 ; AVX1-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
   1841 ; AVX1:       # BB#0:
   1842 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1843 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
   1844 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
   1845 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
   1846 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7]
   1847 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
   1848 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1849 ; AVX1-NEXT:    retq
   1850 ;
   1851 ; AVX2-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
   1852 ; AVX2:       # BB#0:
   1853 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1854 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
   1855 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
   1856 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
   1857 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7]
   1858 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
   1859 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1860 ; AVX2-NEXT:    retq
   1861   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 13, i32 10, i32 11, i32 8, i32 10, i32 14, i32 15, i32 12, i32 13>
   1862   ret <16 x i16> %shuffle
   1863 }
   1864 
   1865 define <16 x i16> @shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15(<16 x i16> %a, <16 x i16> %b) {
   1866 ; AVX1-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15:
   1867 ; AVX1:       # BB#0:
   1868 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1869 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
   1870 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   1871 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3]
   1872 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7]
   1873 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1874 ; AVX1-NEXT:    retq
   1875 ;
   1876 ; AVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15:
   1877 ; AVX2:       # BB#0:
   1878 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1879 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
   1880 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   1881 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3]
   1882 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7]
   1883 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1884 ; AVX2-NEXT:    retq
   1885   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 15, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 15>
   1886   ret <16 x i16> %shuffle
   1887 }
   1888 
   1889 define <16 x i16> @shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08(<16 x i16> %a, <16 x i16> %b) {
   1890 ; AVX1-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08:
   1891 ; AVX1:       # BB#0:
   1892 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1893 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
   1894 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   1895 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
   1896 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   1897 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   1898 ; AVX1-NEXT:    retq
   1899 ;
   1900 ; AVX2-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08:
   1901 ; AVX2:       # BB#0:
   1902 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1903 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
   1904 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   1905 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
   1906 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   1907 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
   1908 ; AVX2-NEXT:    retq
   1909   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 8, i32 15, i32 13, i32 14, i32 12, i32 11, i32 9, i32 10, i32 8>
   1910   ret <16 x i16> %shuffle
   1911 }
   1912 
   1913 define <16 x i16> @shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08(<16 x i16> %a, <16 x i16> %b) {
   1914 ; AVX1-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
   1915 ; AVX1:       # BB#0:
   1916 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1917 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
   1918 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,2,3]
   1919 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1920 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
   1921 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1922 ; AVX1-NEXT:    retq
   1923 ;
   1924 ; AVX2-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
   1925 ; AVX2:       # BB#0:
   1926 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1927 ; AVX2-NEXT:    vpbroadcastw %xmm1, %xmm2
   1928 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,2,3]
   1929 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1930 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
   1931 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1932 ; AVX2-NEXT:    retq
   1933   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 8, i32 9, i32 8, i32 13, i32 12, i32 13, i32 12, i32 9, i32 8>
   1934   ret <16 x i16> %shuffle
   1935 }
   1936 
   1937 define <16 x i16> @shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08(<16 x i16> %a, <16 x i16> %b) {
   1938 ; AVX1-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
   1939 ; AVX1:       # BB#0:
   1940 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1941 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
   1942 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,2,3]
   1943 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1944 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
   1945 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1946 ; AVX1-NEXT:    retq
   1947 ;
   1948 ; AVX2-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
   1949 ; AVX2:       # BB#0:
   1950 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1951 ; AVX2-NEXT:    vpbroadcastw %xmm1, %xmm2
   1952 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,2,3]
   1953 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1954 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
   1955 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1956 ; AVX2-NEXT:    retq
   1957   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 8, i32 13, i32 12, i32 9, i32 8, i32 13, i32 12, i32 9, i32 8>
   1958   ret <16 x i16> %shuffle
   1959 }
   1960 
   1961 define <16 x i16> @shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12(<16 x i16> %a, <16 x i16> %b) {
   1962 ; AVX1-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
   1963 ; AVX1:       # BB#0:
   1964 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1965 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   1966 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,2,3]
   1967 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1968 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
   1969 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1970 ; AVX1-NEXT:    retq
   1971 ;
   1972 ; AVX2-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
   1973 ; AVX2:       # BB#0:
   1974 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1975 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm2
   1976 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,2,3]
   1977 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1978 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
   1979 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1980 ; AVX2-NEXT:    retq
   1981   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 12, i32 13, i32 12, i32 9, i32 8, i32 9, i32 8, i32 13, i32 12>
   1982   ret <16 x i16> %shuffle
   1983 }
   1984 
   1985 define <16 x i16> @shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08(<16 x i16> %a, <16 x i16> %b) {
   1986 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
   1987 ; AVX1:       # BB#0:
   1988 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1989 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
   1990 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,2,3]
   1991 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1992 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
   1993 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1994 ; AVX1-NEXT:    retq
   1995 ;
   1996 ; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
   1997 ; AVX2:       # BB#0:
   1998 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1999 ; AVX2-NEXT:    vpbroadcastw %xmm1, %xmm2
   2000 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,2,3]
   2001 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2002 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
   2003 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2004 ; AVX2-NEXT:    retq
   2005   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 8>
   2006   ret <16 x i16> %shuffle
   2007 }
   2008 
   2009 define <16 x i16> @shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12(<16 x i16> %a, <16 x i16> %b) {
   2010 ; AVX1-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
   2011 ; AVX1:       # BB#0:
   2012 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2013 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   2014 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,2,3]
   2015 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2016 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
   2017 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2018 ; AVX1-NEXT:    retq
   2019 ;
   2020 ; AVX2-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
   2021 ; AVX2:       # BB#0:
   2022 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2023 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm2
   2024 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,2,3]
   2025 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2026 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
   2027 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2028 ; AVX2-NEXT:    retq
   2029   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 12>
   2030   ret <16 x i16> %shuffle
   2031 }
   2032 
   2033 define <16 x i16> @shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11(<16 x i16> %a, <16 x i16> %b) {
   2034 ; AVX1-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11:
   2035 ; AVX1:       # BB#0:
   2036 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2037 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
   2038 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2039 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2040 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2041 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2042 ; AVX1-NEXT:    retq
   2043 ;
   2044 ; AVX2-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11:
   2045 ; AVX2:       # BB#0:
   2046 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2047 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
   2048 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2049 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2050 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2051 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
   2052 ; AVX2-NEXT:    retq
   2053   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 11, i32 10, i32 14, i32 12, i32 8, i32 13, i32 9, i32 15, i32 11>
   2054   ret <16 x i16> %shuffle
   2055 }
   2056 
   2057 define <16 x i16> @shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11(<16 x i16> %a, <16 x i16> %b) {
   2058 ; AVX1-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11:
   2059 ; AVX1:       # BB#0:
   2060 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2061 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
   2062 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2063 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2064 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2065 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2066 ; AVX1-NEXT:    retq
   2067 ;
   2068 ; AVX2-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11:
   2069 ; AVX2:       # BB#0:
   2070 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2071 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
   2072 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2073 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2074 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2075 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
   2076 ; AVX2-NEXT:    retq
   2077   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 11, i32 10, i32 8, i32 14, i32 12, i32 13, i32 9, i32 15, i32 11>
   2078   ret <16 x i16> %shuffle
   2079 }
   2080 
   2081 define <16 x i16> @shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13(<16 x i16> %a, <16 x i16> %b) {
   2082 ; AVX1-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13:
   2083 ; AVX1:       # BB#0:
   2084 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2085 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
   2086 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2087 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
   2088 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2089 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2090 ; AVX1-NEXT:    retq
   2091 ;
   2092 ; AVX2-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13:
   2093 ; AVX2:       # BB#0:
   2094 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2095 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
   2096 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2097 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
   2098 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2099 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
   2100 ; AVX2-NEXT:    retq
   2101   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 13, i32 10, i32 14, i32 12, i32 8, i32 9, i32 11, i32 15, i32 13>
   2102   ret <16 x i16> %shuffle
   2103 }
   2104 
   2105 define <16 x i16> @shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11(<16 x i16> %a, <16 x i16> %b) {
   2106 ; AVX1-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
   2107 ; AVX1:       # BB#0:
   2108 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2109 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
   2110 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2111 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
   2112 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2113 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2114 ; AVX1-NEXT:    retq
   2115 ;
   2116 ; AVX2-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
   2117 ; AVX2:       # BB#0:
   2118 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2119 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
   2120 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2121 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
   2122 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2123 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
   2124 ; AVX2-NEXT:    retq
   2125   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 11, i32 14, i32 14, i32 15, i32 13, i32 9, i32 14, i32 12, i32 11>
   2126   ret <16 x i16> %shuffle
   2127 }
   2128 
   2129 define <16 x i16> @shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   2130 ; AVX1-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
   2131 ; AVX1:       # BB#0:
   2132 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2133 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   2134 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,14,15]
   2135 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2136 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
   2137 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2138 ; AVX1-NEXT:    retq
   2139 ;
   2140 ; AVX2-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
   2141 ; AVX2:       # BB#0:
   2142 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2143 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm2
   2144 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,14,15]
   2145 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2146 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
   2147 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2148 ; AVX2-NEXT:    retq
   2149   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 12, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
   2150   ret <16 x i16> %shuffle
   2151 }
   2152 
   2153 define <16 x i16> @shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   2154 ; AVX1-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
   2155 ; AVX1:       # BB#0:
   2156 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2157 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   2158 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,14,15]
   2159 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2160 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
   2161 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2162 ; AVX1-NEXT:    retq
   2163 ;
   2164 ; AVX2-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
   2165 ; AVX2:       # BB#0:
   2166 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2167 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm2
   2168 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,14,15]
   2169 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2170 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
   2171 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2172 ; AVX2-NEXT:    retq
   2173   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 12, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
   2174   ret <16 x i16> %shuffle
   2175 }
   2176 
   2177 define <16 x i16> @shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   2178 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
   2179 ; AVX1:       # BB#0:
   2180 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2181 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   2182 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
   2183 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2184 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
   2185 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2186 ; AVX1-NEXT:    retq
   2187 ;
   2188 ; AVX2-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
   2189 ; AVX2:       # BB#0:
   2190 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2191 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm2
   2192 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
   2193 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2194 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
   2195 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2196 ; AVX2-NEXT:    retq
   2197   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 12, i32 8, i32 12, i32 12, i32 8, i32 12, i32 12, i32 12, i32 12>
   2198   ret <16 x i16> %shuffle
   2199 }
   2200 
   2201 define <16 x i16> @shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   2202 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
   2203 ; AVX1:       # BB#0:
   2204 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2205 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
   2206 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,14,15]
   2207 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2208 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
   2209 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2210 ; AVX1-NEXT:    retq
   2211 ;
   2212 ; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
   2213 ; AVX2:       # BB#0:
   2214 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2215 ; AVX2-NEXT:    vpbroadcastw %xmm1, %xmm2
   2216 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,14,15]
   2217 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2218 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
   2219 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2220 ; AVX2-NEXT:    retq
   2221   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 8, i32 8, i32 8>
   2222   ret <16 x i16> %shuffle
   2223 }
   2224 
   2225 define <16 x i16> @shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
   2226 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
   2227 ; AVX1:       # BB#0:
   2228 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2229 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
   2230 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
   2231 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   2232 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
   2233 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7]
   2234 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2235 ; AVX1-NEXT:    retq
   2236 ;
   2237 ; AVX2-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
   2238 ; AVX2:       # BB#0:
   2239 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2240 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
   2241 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
   2242 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   2243 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
   2244 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7]
   2245 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2246 ; AVX2-NEXT:    retq
   2247   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 15, i32 8, i32 12, i32 12, i32 8, i32 12, i32 13, i32 14, i32 15>
   2248   ret <16 x i16> %shuffle
   2249 }
   2250 
   2251 define <16 x i16> @shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   2252 ; AVX1-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
   2253 ; AVX1:       # BB#0:
   2254 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2255 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   2256 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,14,15]
   2257 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2258 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
   2259 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2260 ; AVX1-NEXT:    retq
   2261 ;
   2262 ; AVX2-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
   2263 ; AVX2:       # BB#0:
   2264 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2265 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm2
   2266 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,14,15]
   2267 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2268 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
   2269 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2270 ; AVX2-NEXT:    retq
   2271   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 12, i32 8, i32 undef, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
   2272   ret <16 x i16> %shuffle
   2273 }
   2274 
   2275 define <16 x i16> @shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   2276 ; AVX1-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
   2277 ; AVX1:       # BB#0:
   2278 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2279 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   2280 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
   2281 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2282 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
   2283 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2284 ; AVX1-NEXT:    retq
   2285 ;
   2286 ; AVX2-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
   2287 ; AVX2:       # BB#0:
   2288 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2289 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm2
   2290 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
   2291 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2292 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
   2293 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2294 ; AVX2-NEXT:    retq
   2295   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 12, i32 12, i32 12, i32 undef, i32 8, i32 12, i32 12, i32 12, i32 12>
   2296   ret <16 x i16> %shuffle
   2297 }
   2298 
   2299 define <16 x i16> @shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   2300 ; AVX1-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
   2301 ; AVX1:       # BB#0:
   2302 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2303 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   2304 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
   2305 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2306 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
   2307 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2308 ; AVX1-NEXT:    retq
   2309 ;
   2310 ; AVX2-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
   2311 ; AVX2:       # BB#0:
   2312 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2313 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm2
   2314 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
   2315 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2316 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
   2317 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2318 ; AVX2-NEXT:    retq
   2319   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 12, i32 undef, i32 12, i32 12, i32 8, i32 12, i32 12, i32 12, i32 12>
   2320   ret <16 x i16> %shuffle
   2321 }
   2322 
   2323 define <16 x i16> @shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   2324 ; AVX1-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu:
   2325 ; AVX1:       # BB#0:
   2326 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2327 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
   2328 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
   2329 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2330 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2331 ; AVX1-NEXT:    retq
   2332 ;
   2333 ; AVX2-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu:
   2334 ; AVX2:       # BB#0:
   2335 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15,16,17,18,19,20,21,30,31,20,21,30,31,28,29,30,31]
   2336 ; AVX2-NEXT:    retq
   2337   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
   2338   ret <16 x i16> %shuffle
   2339 }
   2340 
   2341 define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11(<16 x i16> %a, <16 x i16> %b) {
   2342 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11:
   2343 ; AVX1:       # BB#0:
   2344 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2345 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
   2346 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2347 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
   2348 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2349 ; AVX1-NEXT:    retq
   2350 ;
   2351 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11:
   2352 ; AVX2:       # BB#0:
   2353 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2354 ; AVX2-NEXT:    vpbroadcastq %xmm1, %xmm2
   2355 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2356 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
   2357 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2358 ; AVX2-NEXT:    retq
   2359   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11>
   2360   ret <16 x i16> %shuffle
   2361 }
   2362 
   2363 define <16 x i16> @shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   2364 ; AVX1-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu:
   2365 ; AVX1:       # BB#0:
   2366 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2367 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
   2368 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
   2369 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2370 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2371 ; AVX1-NEXT:    retq
   2372 ;
   2373 ; AVX2-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu:
   2374 ; AVX2:       # BB#0:
   2375 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3,24,25,26,27,28,29,22,23,24,25,26,27,16,17,18,19]
   2376 ; AVX2-NEXT:    retq
   2377   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
   2378   ret <16 x i16> %shuffle
   2379 }
   2380 
   2381 define <16 x i16> @shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15(<16 x i16> %a) {
   2382 ; AVX1-LABEL: shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15:
   2383 ; AVX1:       # BB#0:
   2384 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[1,1,2,3,4,5,6,7]
   2385 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
   2386 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4,5,6,7]
   2387 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   2388 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   2389 ; AVX1-NEXT:    retq
   2390 ;
   2391 ; AVX2-LABEL: shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15:
   2392 ; AVX2:       # BB#0:
   2393 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[2,3],zero,zero,ymm0[4,5],zero,zero,ymm0[8,9,u,u,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
   2394 ; AVX2-NEXT:    retq
   2395   %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 1, i32 16, i32 2, i32 16, i32 4, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   2396   ret <16 x i16> %shuffle
   2397 }
   2398 
   2399 define <16 x i16> @shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11(<16 x i16> %a, <16 x i16> %b) {
   2400 ; AVX1-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11:
   2401 ; AVX1:       # BB#0:
   2402 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2403 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
   2404 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2405 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2406 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2407 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2408 ; AVX1-NEXT:    retq
   2409 ;
   2410 ; AVX2-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11:
   2411 ; AVX2:       # BB#0:
   2412 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2413 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
   2414 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2415 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2416 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2417 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
   2418 ; AVX2-NEXT:    retq
   2419   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 11, i32 8, i32 9, i32 10, i32 15, i32 12, i32 13, i32 14, i32 11>
   2420   ret <16 x i16> %shuffle
   2421 }
   2422 
   2423 define <16 x i16> @shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15(<16 x i16> %a, <16 x i16> %b) {
   2424 ; AVX1-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15:
   2425 ; AVX1:       # BB#0:
   2426 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2427 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,2,3]
   2428 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   2429 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
   2430 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2431 ; AVX1-NEXT:    retq
   2432 ;
   2433 ; AVX2-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15:
   2434 ; AVX2:       # BB#0:
   2435 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2436 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,2,3]
   2437 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   2438 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
   2439 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2440 ; AVX2-NEXT:    retq
   2441   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 15, i32 12, i32 13, i32 14, i32 11, i32 8, i32 9, i32 10, i32 15>
   2442   ret <16 x i16> %shuffle
   2443 }
   2444 
   2445 define <16 x i16> @shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13(<16 x i16> %a, <16 x i16> %b) {
   2446 ; AVX1-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
   2447 ; AVX1:       # BB#0:
   2448 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2449 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
   2450 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2451 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
   2452 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2453 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2454 ; AVX1-NEXT:    retq
   2455 ;
   2456 ; AVX2-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
   2457 ; AVX2:       # BB#0:
   2458 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2459 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
   2460 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2461 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
   2462 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2463 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
   2464 ; AVX2-NEXT:    retq
   2465   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 13, i32 11, i32 15, i32 9, i32 8, i32 10, i32 15, i32 11, i32 13>
   2466   ret <16 x i16> %shuffle
   2467 }
   2468 
   2469 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) {
   2470 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
   2471 ; AVX1:       # BB#0:
   2472 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2473 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   2474 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
   2475 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   2476 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15]
   2477 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2478 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2479 ; AVX1-NEXT:    retq
   2480 ;
   2481 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
   2482 ; AVX2:       # BB#0:
   2483 ; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
   2484 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   2485 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   2486 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,2,4,5,6,7]
   2487 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,7]
   2488 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
   2489 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
   2490 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
   2491 ; AVX2-NEXT:    retq
   2492   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 27, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
   2493   ret <16 x i16> %shuffle
   2494 }
   2495 
   2496 define <16 x i16> @shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31(<16 x i16> %a, <16 x i16> %b) {
   2497 ; AVX1-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31:
   2498 ; AVX1:       # BB#0:
   2499 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2500 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   2501 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm3[2,3,0,1]
   2502 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
   2503 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
   2504 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15]
   2505 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2506 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2507 ; AVX1-NEXT:    retq
   2508 ;
   2509 ; AVX2-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31:
   2510 ; AVX2:       # BB#0:
   2511 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
   2512 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15]
   2513 ; AVX2-NEXT:    vpshufb %xmm1, %xmm0, %xmm2
   2514 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
   2515 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm0[7]
   2516 ; AVX2-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
   2517 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm2, %ymm0
   2518 ; AVX2-NEXT:    retq
   2519   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 20, i32 1, i32 21, i32 2, i32 22, i32 3, i32 31, i32 8, i32 28, i32 9, i32 29, i32 10, i32 30, i32 11, i32 31>
   2520   ret <16 x i16> %shuffle
   2521 }
   2522 
   2523 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
   2524 ; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
   2525 ; AVX1:       # BB#0:
   2526 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2527 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   2528 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
   2529 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   2530 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15]
   2531 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   2532 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2533 ; AVX1-NEXT:    retq
   2534 ;
   2535 ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
   2536 ; AVX2:       # BB#0:
   2537 ; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
   2538 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm3 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
   2539 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   2540 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,2,4,5,6,7]
   2541 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,7]
   2542 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
   2543 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm0 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
   2544 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
   2545 ; AVX2-NEXT:    retq
   2546   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 31, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
   2547   ret <16 x i16> %shuffle
   2548 }
   2549 
   2550 define <16 x i16> @shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27(<16 x i16> %a, <16 x i16> %b) {
   2551 ; AVX1-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27:
   2552 ; AVX1:       # BB#0:
   2553 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2554 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   2555 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
   2556 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
   2557 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   2558 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15]
   2559 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   2560 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2561 ; AVX1-NEXT:    retq
   2562 ;
   2563 ; AVX2-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27:
   2564 ; AVX2:       # BB#0:
   2565 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
   2566 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2567 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2568 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [8,9,0,1,10,11,2,3,12,13,4,5,14,15,6,7]
   2569 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2570 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
   2571 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2572 ; AVX2-NEXT:    retq
   2573   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 16, i32 5, i32 17, i32 6, i32 18, i32 7, i32 27, i32 12, i32 24, i32 13, i32 25, i32 14, i32 26, i32 15, i32 27>
   2574   ret <16 x i16> %shuffle
   2575 }
   2576 
   2577 define <16 x i16> @shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
   2578 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
   2579 ; AVX1:       # BB#0:
   2580 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2581 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[0,3,2,3]
   2582 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
   2583 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm4[0,3,2,3]
   2584 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
   2585 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,1,4,5,6,7]
   2586 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
   2587 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7]
   2588 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15]
   2589 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
   2590 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2591 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2592 ; AVX1-NEXT:    retq
   2593 ;
   2594 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
   2595 ; AVX2:       # BB#0:
   2596 ; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
   2597 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,1,4,5,6,7]
   2598 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
   2599 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3]
   2600 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[0,0,2,1,4,5,6,7]
   2601 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
   2602 ; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
   2603 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15]
   2604 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,7,7,8,9,10,11,14,13,15,15]
   2605 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
   2606 ; AVX2-NEXT:    retq
   2607   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 6, i32 22, i32 7, i32 31, i32 8, i32 24, i32 9, i32 25, i32 14, i32 30, i32 15, i32 31>
   2608   ret <16 x i16> %shuffle
   2609 }
   2610 
   2611 define <16 x i16> @shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25(<16 x i16> %a, <16 x i16> %b) {
   2612 ; AVX1-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
   2613 ; AVX1:       # BB#0:
   2614 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2615 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[2,0,2,3]
   2616 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
   2617 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm4[0,3,2,3]
   2618 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
   2619 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7]
   2620 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,0,1,2,3,2,3,0,1,12,13,2,3]
   2621 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
   2622 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2623 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2624 ; AVX1-NEXT:    retq
   2625 ;
   2626 ; AVX2-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
   2627 ; AVX2:       # BB#0:
   2628 ; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
   2629 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [8,9,8,9,4,5,10,11,0,1,0,1,12,13,2,3]
   2630 ; AVX2-NEXT:    vpshufb %xmm3, %xmm2, %xmm4
   2631 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7]
   2632 ; AVX2-NEXT:    vpshufb %xmm3, %xmm1, %xmm1
   2633 ; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm1, %ymm1
   2634 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15]
   2635 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,7,7,8,9,10,11,14,13,15,15]
   2636 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
   2637 ; AVX2-NEXT:    retq
   2638   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 20, i32 1, i32 21, i32 6, i32 16, i32 7, i32 25, i32 8, i32 28, i32 9, i32 29, i32 14, i32 24, i32 15, i32 25>
   2639   ret <16 x i16> %shuffle
   2640 }
   2641 
   2642 define <16 x i16> @shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26(<16 x i16> %a, <16 x i16> %b) {
   2643 ; AVX1-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
   2644 ; AVX1:       # BB#0:
   2645 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2646 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   2647 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,12,13,10,11,8,9,10,11,12,13,10,11]
   2648 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm3 = xmm0[1,0,3,2,4,5,6,7]
   2649 ; AVX1-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
   2650 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   2651 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   2652 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
   2653 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
   2654 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   2655 ; AVX1-NEXT:    retq
   2656 ;
   2657 ; AVX2-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
   2658 ; AVX2:       # BB#0:
   2659 ; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
   2660 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   2661 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,1,2,0,4,5,6,7]
   2662 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,5]
   2663 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[0,1,2,3,2,3,0,1,8,9,10,11,6,7,4,5]
   2664 ; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
   2665 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,4,5,6,7,6,7,4,5,4,5,6,7,18,19,16,17,20,21,22,23,22,23,20,21,20,21,22,23]
   2666 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
   2667 ; AVX2-NEXT:    retq
   2668   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 0, i32 17, i32 16, i32 3, i32 2, i32 19, i32 26, i32 9, i32 8, i32 25, i32 24, i32 11, i32 10, i32 27, i32 26>
   2669   ret <16 x i16> %shuffle
   2670 }
   2671 
   2672 define <16 x i16> @shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11(<16 x i16> %a, <16 x i16> %b) {
   2673 ; AVX1-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
   2674 ; AVX1:       # BB#0:
   2675 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2676 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   2677 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
   2678 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   2679 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15]
   2680 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
   2681 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2682 ; AVX1-NEXT:    retq
   2683 ;
   2684 ; AVX2-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
   2685 ; AVX2:       # BB#0:
   2686 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
   2687 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   2688 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   2689 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
   2690 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
   2691 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
   2692 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11]
   2693 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
   2694 ; AVX2-NEXT:    retq
   2695   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 0, i32 17, i32 1, i32 18, i32 2, i32 19, i32 11, i32 24, i32 8, i32 25, i32 9, i32 26, i32 10, i32 27, i32 11>
   2696   ret <16 x i16> %shuffle
   2697 }
   2698 
   2699 define <16 x i16> @shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15(<16 x i16> %a, <16 x i16> %b) {
   2700 ; AVX1-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
   2701 ; AVX1:       # BB#0:
   2702 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2703 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   2704 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
   2705 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
   2706 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15]
   2707 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
   2708 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2709 ; AVX1-NEXT:    retq
   2710 ;
   2711 ; AVX2-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
   2712 ; AVX2:       # BB#0:
   2713 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
   2714 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm3 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
   2715 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
   2716 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
   2717 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
   2718 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
   2719 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15]
   2720 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
   2721 ; AVX2-NEXT:    retq
   2722   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 4, i32 21, i32 5, i32 22, i32 6, i32 23, i32 15, i32 28, i32 12, i32 29, i32 13, i32 30, i32 14, i32 31, i32 15>
   2723   ret <16 x i16> %shuffle
   2724 }
   2725 
   2726 define <16 x i16> @shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31(<16 x i16> %a, <16 x i16> %b) {
   2727 ; AVX1-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31:
   2728 ; AVX1:       # BB#0:
   2729 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2730 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[0,2,1,3,4,5,6,7]
   2731 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   2732 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
   2733 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm4 = xmm4[0,2,1,3,4,5,6,7]
   2734 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
   2735 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
   2736 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,4,5,14,15,0,1,4,5,4,5,6,7]
   2737 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
   2738 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   2739 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2740 ; AVX1-NEXT:    retq
   2741 ;
   2742 ; AVX2-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31:
   2743 ; AVX2:       # BB#0:
   2744 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
   2745 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2746 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
   2747 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
   2748 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   2749 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
   2750 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
   2751 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2752 ; AVX2-NEXT:    retq
   2753   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 2, i32 1, i32 3, i32 20, i32 22, i32 21, i32 31, i32 8, i32 10, i32 9, i32 11, i32 28, i32 30, i32 29, i32 31>
   2754   ret <16 x i16> %shuffle
   2755 }
   2756 
   2757 define <16 x i16> @shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   2758 ; AVX1-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu:
   2759 ; AVX1:       # BB#0:
   2760 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   2761 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,1,2,3]
   2762 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[0,0,3,2,4,5,6,7]
   2763 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
   2764 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   2765 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   2766 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
   2767 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
   2768 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
   2769 ; AVX1-NEXT:    retq
   2770 ;
   2771 ; AVX2-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu:
   2772 ; AVX2:       # BB#0:
   2773 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15]
   2774 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,1,2,3,6,5,6,7]
   2775 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,3,2,4,5,6,7,8,8,11,10,12,13,14,15]
   2776 ; AVX2-NEXT:    retq
   2777   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 3, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 12, i32 11, i32 26, i32 undef, i32 undef, i32 undef, i32 undef>
   2778   ret <16 x i16> %shuffle
   2779 }
   2780 
   2781 define <16 x i16> @shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   2782 ; AVX1-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu:
   2783 ; AVX1:       # BB#0:
   2784 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2785 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   2786 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
   2787 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
   2788 ; AVX1-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
   2789 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
   2790 ; AVX1-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
   2791 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2792 ; AVX1-NEXT:    retq
   2793 ;
   2794 ; AVX2-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu:
   2795 ; AVX2:       # BB#0:
   2796 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
   2797 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3,16,17,22,23,20,21,26,27,16,17,26,27,16,17,18,19]
   2798 ; AVX2-NEXT:    retq
   2799   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 3, i32 2, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 11, i32 10, i32 29, i32 undef, i32 undef, i32 undef, i32 undef>
   2800   ret <16 x i16> %shuffle
   2801 }
   2802 
   2803 define <16 x i16> @shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   2804 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu:
   2805 ; AVX1:       # BB#0:
   2806 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm1[0,2,2,3,4,6,6,7]
   2807 ; AVX1-NEXT:    retq
   2808 ;
   2809 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu:
   2810 ; AVX2:       # BB#0:
   2811 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm1[0,2,2,3,4,6,6,7]
   2812 ; AVX2-NEXT:    retq
   2813   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 29, i32 undef, i32 undef, i32 undef, i32 undef>
   2814   ret <16 x i16> %shuffle
   2815 }
   2816 
   2817 define <16 x i16> @shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   2818 ; AVX1-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu:
   2819 ; AVX1:       # BB#0:
   2820 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2821 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   2822 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
   2823 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[3],xmm2[4,5,6,7]
   2824 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
   2825 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2826 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2827 ; AVX1-NEXT:    retq
   2828 ;
   2829 ; AVX2-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu:
   2830 ; AVX2:       # BB#0:
   2831 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
   2832 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
   2833 ; AVX2-NEXT:    retq
   2834   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 29, i32 undef, i32 undef, i32 undef, i32 undef>
   2835   ret <16 x i16> %shuffle
   2836 }
   2837 
   2838 define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11(<16 x i16> %a, <16 x i16> %b) {
   2839 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11:
   2840 ; AVX1:       # BB#0:
   2841 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2842 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   2843 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
   2844 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm0[7]
   2845 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
   2846 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2847 ; AVX1-NEXT:    retq
   2848 ;
   2849 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11:
   2850 ; AVX2:       # BB#0:
   2851 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,2]
   2852 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
   2853 ; AVX2-NEXT:    retq
   2854   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 20, i32 21, i32 22, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 11>
   2855   ret <16 x i16> %shuffle
   2856 }
   2857 
   2858 define <16 x i16> @shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   2859 ; AVX1-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu:
   2860 ; AVX1:       # BB#0:
   2861 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2862 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   2863 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
   2864 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[3],xmm3[4,5,6,7]
   2865 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
   2866 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
   2867 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2868 ; AVX1-NEXT:    retq
   2869 ;
   2870 ; AVX2-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu:
   2871 ; AVX2:       # BB#0:
   2872 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7]
   2873 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6,7,8,9,10],ymm0[11],ymm1[12,13,14,15]
   2874 ; AVX2-NEXT:    retq
   2875   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 21, i32 22, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
   2876   ret <16 x i16> %shuffle
   2877 }
   2878 
   2879 define <16 x i16> @shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11(<16 x i16> %a, <16 x i16> %b) {
   2880 ; AVX1-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11:
   2881 ; AVX1:       # BB#0:
   2882 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2883 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
   2884 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm1[0,2,2,3]
   2885 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3,4,5,6],xmm0[7]
   2886 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
   2887 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
   2888 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
   2889 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2890 ; AVX1-NEXT:    retq
   2891 ;
   2892 ; AVX2-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11:
   2893 ; AVX2:       # BB#0:
   2894 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
   2895 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2896 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2897 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
   2898 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2899 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
   2900 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2901 ; AVX2-NEXT:    retq
   2902   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 21, i32 20, i32 21, i32 22, i32 11, i32 8, i32 9, i32 10, i32 29, i32 28, i32 29, i32 30, i32 11>
   2903   ret <16 x i16> %shuffle
   2904 }
   2905 
   2906 define <16 x i16> @shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15(<16 x i16> %a, <16 x i16> %b) {
   2907 ; AVX1-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15:
   2908 ; AVX1:       # BB#0:
   2909 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2910 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   2911 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2,3],xmm2[4,5,6],xmm3[7]
   2912 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
   2913 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
   2914 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2915 ; AVX1-NEXT:    retq
   2916 ;
   2917 ; AVX2-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15:
   2918 ; AVX2:       # BB#0:
   2919 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
   2920 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4,5,6],ymm0[7,8],ymm1[9],ymm0[10,11],ymm1[12,13,14],ymm0[15]
   2921 ; AVX2-NEXT:    retq
   2922   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 20, i32 21, i32 22, i32 15, i32 8, i32 25, i32 10, i32 11, i32 28, i32 29, i32 30, i32 15>
   2923   ret <16 x i16> %shuffle
   2924 }
   2925 
   2926 define <16 x i16> @shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25(<16 x i16> %a, <16 x i16> %b) {
   2927 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
   2928 ; AVX1:       # BB#0:
   2929 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
   2930 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
   2931 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2932 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[0,1,2,1,4,5,6,7]
   2933 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,7,7]
   2934 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm1[7]
   2935 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
   2936 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
   2937 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   2938 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2939 ; AVX1-NEXT:    retq
   2940 ;
   2941 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
   2942 ; AVX2:       # BB#0:
   2943 ; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm1
   2944 ; AVX2-NEXT:    vpbroadcastd %xmm1, %ymm1
   2945 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,1,2,1,4,5,6,7,8,9,10,9,12,13,14,15]
   2946 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,7,7,8,9,10,11,12,13,15,15]
   2947 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
   2948 ; AVX2-NEXT:    retq
   2949   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 25, i32 undef, i32 undef, i32 undef, i32 9, i32 undef, i32 13, i32 15, i32 25>
   2950   ret <16 x i16> %shuffle
   2951 }
   2952 
   2953 define <16 x i16> @shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu(<16 x i16> %a, <16 x i16> %b) {
   2954 ; AVX1-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu:
   2955 ; AVX1:       # BB#0:
   2956 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2957 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
   2958 ; AVX1-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
   2959 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
   2960 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm4[2,2,3,3]
   2961 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7]
   2962 ; AVX1-NEXT:    vpshufb %xmm3, %xmm1, %xmm1
   2963 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
   2964 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
   2965 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2966 ; AVX1-NEXT:    retq
   2967 ;
   2968 ; AVX2-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu:
   2969 ; AVX2:       # BB#0:
   2970 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5,16,17,20,21,20,21,22,23,16,17,20,21,24,25,20,21]
   2971 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
   2972 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
   2973 ; AVX2-NEXT:    retq
   2974   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 16, i32 18, i32 20, i32 undef, i32 undef, i32 undef, i32 12, i32 undef, i32 24, i32 26, i32 28, i32 undef>
   2975   ret <16 x i16> %shuffle
   2976 }
   2977 
   2978 define <16 x i16> @shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12(<16 x i16> %a, <16 x i16> %b) {
   2979 ; AVX1-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12:
   2980 ; AVX1:       # BB#0:
   2981 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2982 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   2983 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
   2984 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
   2985 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
   2986 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   2987 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7]
   2988 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2989 ; AVX1-NEXT:    retq
   2990 ;
   2991 ; AVX2-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12:
   2992 ; AVX2:       # BB#0:
   2993 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5,6,7],ymm0[8,9,10,11,12],ymm1[13,14,15]
   2994 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2995 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
   2996 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   2997 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   2998 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2999 ; AVX2-NEXT:    retq
   3000   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 12, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12>
   3001   ret <16 x i16> %shuffle
   3002 }
   3003 
   3004 define <16 x i16> @shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) {
   3005 ; AVX1-LABEL: shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu:
   3006 ; AVX1:       # BB#0:
   3007 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   3008 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   3009 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
   3010 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
   3011 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3012 ; AVX1-NEXT:    retq
   3013 ;
   3014 ; AVX2-LABEL: shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu:
   3015 ; AVX2:       # BB#0:
   3016 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[10,11,12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9],ymm1[26,27,28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25]
   3017 ; AVX2-NEXT:    retq
   3018   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 22, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 30, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
   3019   ret <16 x i16> %shuffle
   3020 }
   3021 
   3022 define <16 x i16> @shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12(<16 x i16> %a, <16 x i16> %b) {
   3023 ; AVX1-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12:
   3024 ; AVX1:       # BB#0:
   3025 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   3026 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
   3027 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   3028 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   3029 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   3030 ; AVX1-NEXT:    retq
   3031 ;
   3032 ; AVX2-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12:
   3033 ; AVX2:       # BB#0:
   3034 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   3035 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
   3036 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   3037 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   3038 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   3039 ; AVX2-NEXT:    retq
   3040   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12>
   3041   ret <16 x i16> %shuffle
   3042 }
   3043 
   3044 define <16 x i16> @shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) {
   3045 ; AVX1-LABEL: shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu:
   3046 ; AVX1:       # BB#0:
   3047 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   3048 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   3049 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   3050 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   3051 ; AVX1-NEXT:    retq
   3052 ;
   3053 ; AVX2-LABEL: shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu:
   3054 ; AVX2:       # BB#0:
   3055 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
   3056 ; AVX2-NEXT:    retq
   3057   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 14, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
   3058   ret <16 x i16> %shuffle
   3059 }
   3060 
   3061 define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) {
   3062 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu:
   3063 ; AVX1:       # BB#0:
   3064 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
   3065 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   3066 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
   3067 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   3068 ; AVX1-NEXT:    retq
   3069 ;
   3070 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu:
   3071 ; AVX2:       # BB#0:
   3072 ; AVX2-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9],zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25]
   3073 ; AVX2-NEXT:    retq
   3074   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
   3075   ret <16 x i16> %shuffle
   3076 }
   3077 
   3078 define <16 x i16> @shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10(<16 x i16> %a, <16 x i16> %b) {
   3079 ; AVX1-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10:
   3080 ; AVX1:       # BB#0:
   3081 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   3082 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   3083 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
   3084 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
   3085 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,4,5,6,7,8,9,0,1,4,5,10,11]
   3086 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   3087 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4],xmm0[5,6,7]
   3088 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3089 ; AVX1-NEXT:    retq
   3090 ;
   3091 ; AVX2-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10:
   3092 ; AVX2:       # BB#0:
   3093 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7],ymm0[8,9,10],ymm1[11,12,13,14,15]
   3094 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   3095 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   3096 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3097 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3098 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   3099 ; AVX2-NEXT:    retq
   3100   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 10, i32 27, i32 28, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10>
   3101   ret <16 x i16> %shuffle
   3102 }
   3103 
   3104 define <16 x i16> @shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu(<16 x i16> %a, <16 x i16> %b) {
   3105 ; AVX1-LABEL: shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu:
   3106 ; AVX1:       # BB#0:
   3107 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   3108 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   3109 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
   3110 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
   3111 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3112 ; AVX1-NEXT:    retq
   3113 ;
   3114 ; AVX2-LABEL: shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu:
   3115 ; AVX2:       # BB#0:
   3116 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3,4,5],ymm1[22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19,20,21]
   3117 ; AVX2-NEXT:    retq
   3118   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 20, i32 21, i32 22, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 undef, i32 undef, i32 9, i32 undef>
   3119   ret <16 x i16> %shuffle
   3120 }
   3121 
   3122 define <16 x i16> @shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10(<16 x i16> %a, <16 x i16> %b) {
   3123 ; AVX1-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10:
   3124 ; AVX1:       # BB#0:
   3125 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   3126 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   3127 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3128 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3129 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   3130 ; AVX1-NEXT:    retq
   3131 ;
   3132 ; AVX2-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10:
   3133 ; AVX2:       # BB#0:
   3134 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   3135 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   3136 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3137 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3138 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   3139 ; AVX2-NEXT:    retq
   3140   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10>
   3141   ret <16 x i16> %shuffle
   3142 }
   3143 
   3144 define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu(<16 x i16> %a, <16 x i16> %b) {
   3145 ; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu:
   3146 ; AVX1:       # BB#0:
   3147 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3148 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   3149 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3150 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   3151 ; AVX1-NEXT:    retq
   3152 ;
   3153 ; AVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu:
   3154 ; AVX2:       # BB#0:
   3155 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
   3156 ; AVX2-NEXT:    retq
   3157   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 9, i32 undef>
   3158   ret <16 x i16> %shuffle
   3159 }
   3160 
   3161 define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   3162 ; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu:
   3163 ; AVX1:       # BB#0:
   3164 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   3165 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   3166 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   3167 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   3168 ; AVX1-NEXT:    retq
   3169 ;
   3170 ; AVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu:
   3171 ; AVX2:       # BB#0:
   3172 ; AVX2-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,ymm0[22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero
   3173 ; AVX2-NEXT:    retq
   3174   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 undef, i32 undef>
   3175   ret <16 x i16> %shuffle
   3176 }
   3177 
   3178 define <16 x i16> @shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26(<16 x i16> %a, <16 x i16> %b) {
   3179 ; AVX1-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26:
   3180 ; AVX1:       # BB#0:
   3181 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   3182 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   3183 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
   3184 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
   3185 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,8,9,0,1,4,5,10,11]
   3186 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   3187 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5,6,7]
   3188 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3189 ; AVX1-NEXT:    retq
   3190 ;
   3191 ; AVX2-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26:
   3192 ; AVX2:       # BB#0:
   3193 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15]
   3194 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   3195 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   3196 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3197 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3198 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   3199 ; AVX2-NEXT:    retq
   3200   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 26, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26>
   3201   ret <16 x i16> %shuffle
   3202 }
   3203 
   3204 define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu(<16 x i16> %a, <16 x i16> %b) {
   3205 ; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu:
   3206 ; AVX1:       # BB#0:
   3207 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   3208 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   3209 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
   3210 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
   3211 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3212 ; AVX1-NEXT:    retq
   3213 ;
   3214 ; AVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu:
   3215 ; AVX2:       # BB#0:
   3216 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3,4,5],ymm0[22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19,20,21]
   3217 ; AVX2-NEXT:    retq
   3218   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 17, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 25, i32 undef>
   3219   ret <16 x i16> %shuffle
   3220 }
   3221 
   3222 define <16 x i16> @shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28(<16 x i16> %a, <16 x i16> %b) {
   3223 ; AVX1-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28:
   3224 ; AVX1:       # BB#0:
   3225 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   3226 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   3227 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
   3228 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
   3229 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
   3230 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   3231 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3,4,5,6,7]
   3232 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3233 ; AVX1-NEXT:    retq
   3234 ;
   3235 ; AVX2-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28:
   3236 ; AVX2:       # BB#0:
   3237 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4],ymm0[5,6,7],ymm1[8,9,10,11,12],ymm0[13,14,15]
   3238 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   3239 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
   3240 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   3241 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   3242 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   3243 ; AVX2-NEXT:    retq
   3244   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 28, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28>
   3245   ret <16 x i16> %shuffle
   3246 }
   3247 
   3248 define <16 x i16> @shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu(<16 x i16> %a, <16 x i16> %b) {
   3249 ; AVX1-LABEL: shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu:
   3250 ; AVX1:       # BB#0:
   3251 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   3252 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   3253 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
   3254 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
   3255 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3256 ; AVX1-NEXT:    retq
   3257 ;
   3258 ; AVX2-LABEL: shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu:
   3259 ; AVX2:       # BB#0:
   3260 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9],ymm0[26,27,28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25]
   3261 ; AVX2-NEXT:    retq
   3262   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 14, i32 undef, i32 undef, i32 25, i32 26, i32 27, i32 undef>
   3263   ret <16 x i16> %shuffle
   3264 }
   3265 
   3266 define <16 x i16> @shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu(<16 x i16> %a, <16 x i16> %b) {
   3267 ; AVX1-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu:
   3268 ; AVX1:       # BB#0:
   3269 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   3270 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   3271 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   3272 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,5,4,4]
   3273 ; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
   3274 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   3275 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
   3276 ; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
   3277 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3278 ; AVX1-NEXT:    retq
   3279 ;
   3280 ; AVX2-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu:
   3281 ; AVX2:       # BB#0:
   3282 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6],ymm1[7],ymm0[8,9,10,11],ymm1[12],ymm0[13,14],ymm1[15]
   3283 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,6,7,6,7,8,9,8,9,10,11,14,15,30,31,30,31,22,23,22,23,24,25,24,25,26,27,30,31]
   3284 ; AVX2-NEXT:    retq
   3285   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 undef, i32 3, i32 undef, i32 20, i32 20, i32 5, i32 undef, i32 31, i32 undef, i32 11, i32 undef, i32 28, i32 28, i32 13, i32 undef>
   3286   ret <16 x i16> %shuffle
   3287 }
   3288 
   3289 define <16 x i16> @shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19(<16 x i16> %a, <16 x i16> %b) {
   3290 ; AVX1-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19:
   3291 ; AVX1:       # BB#0:
   3292 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   3293 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   3294 ; AVX1-NEXT:    retq
   3295 ;
   3296 ; AVX2-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19:
   3297 ; AVX2:       # BB#0:
   3298 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   3299 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
   3300 ; AVX2-NEXT:    retq
   3301   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19>
   3302   ret <16 x i16> %shuffle
   3303 }
   3304 
   3305 define <16 x i16> @shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3(<16 x i16> %a, <16 x i16> %b) {
   3306 ; AVX1-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
   3307 ; AVX1:       # BB#0:
   3308 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
   3309 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
   3310 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   3311 ; AVX1-NEXT:    retq
   3312 ;
   3313 ; AVX2-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
   3314 ; AVX2:       # BB#0:
   3315 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
   3316 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
   3317 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
   3318 ; AVX2-NEXT:    retq
   3319   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
   3320   ret <16 x i16> %shuffle
   3321 }
   3322 
   3323 define <16 x i16> @shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8(<16 x i16> %a, <16 x i16> %b) {
   3324 ; AVX1-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8:
   3325 ; AVX1:       # BB#0:
   3326 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   3327 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   3328 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
   3329 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   3330 ; AVX1-NEXT:    retq
   3331 ;
   3332 ; AVX2-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8:
   3333 ; AVX2:       # BB#0:
   3334 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
   3335 ; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
   3336 ; AVX2-NEXT:    retq
   3337   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
   3338   ret <16 x i16> %shuffle
   3339 }
   3340 
   3341 define <16 x i16> @shuffle_v16i16_4_20_5_21_6_22_7_23_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) {
   3342 ; ALL-LABEL: shuffle_v16i16_4_20_5_21_6_22_7_23_u_u_u_u_u_u_u_u:
   3343 ; ALL:       # BB#0:
   3344 ; ALL-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   3345 ; ALL-NEXT:    retq
   3346   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   3347   ret <16 x i16> %shuffle
   3348 }
   3349 
   3350 define <16 x i16> @shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) {
   3351 ; ALL-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u:
   3352 ; ALL:       # BB#0:
   3353 ; ALL-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
   3354 ; ALL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
   3355 ; ALL-NEXT:    retq
   3356   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   3357   ret <16 x i16> %shuffle
   3358 }
   3359 
   3360 define <16 x i16> @shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) {
   3361 ; AVX1-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
   3362 ; AVX1:       # BB#0:
   3363 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   3364 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
   3365 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
   3366 ; AVX1-NEXT:    retq
   3367 ;
   3368 ; AVX2-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
   3369 ; AVX2:       # BB#0:
   3370 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
   3371 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
   3372 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
   3373 ; AVX2-NEXT:    retq
   3374   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   3375   ret <16 x i16> %shuffle
   3376 }
   3377 
   3378 define <16 x i16> @insert_v16i16_0elt_into_zero_vector(i16* %ptr) {
   3379 ; ALL-LABEL: insert_v16i16_0elt_into_zero_vector:
   3380 ; ALL:       # BB#0:
   3381 ; ALL-NEXT:    movzwl (%rdi), %eax
   3382 ; ALL-NEXT:    vmovd %eax, %xmm0
   3383 ; ALL-NEXT:    retq
   3384   %val = load i16, i16* %ptr
   3385   %i0 = insertelement <16 x i16> zeroinitializer, i16 %val, i32 0
   3386   ret <16 x i16> %i0
   3387 }
   3388 
   3389 define <16 x i16> @concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31(<16 x i16> %a, <16 x i16> %b) {
   3390 ; AVX1-LABEL: concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31:
   3391 ; AVX1:       # BB#0:
   3392 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
   3393 ; AVX1-NEXT:    retq
   3394 ;
   3395 ; AVX2-LABEL: concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31:
   3396 ; AVX2:       # BB#0:
   3397 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
   3398 ; AVX2-NEXT:    retq
   3399   %alo = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   3400   %bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   3401   %shuf = shufflevector <8 x i16> %alo, <8 x i16> %bhi, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   3402   ret <16 x i16> %shuf
   3403 }
   3404 
   3405 define <16 x i16> @concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc(<16 x i16> %a, <16 x i16> %b) {
   3406 ; ALL-LABEL: concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc:
   3407 ; ALL:       # BB#0:
   3408 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
   3409 ; ALL-NEXT:    retq
   3410   %ahi = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   3411   %bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   3412   %bc0hi = bitcast <8 x i16> %ahi to <16 x i8>
   3413   %bc1hi = bitcast <8 x i16> %bhi to <16 x i8>
   3414   %shuffle8 = shufflevector <16 x i8> %bc0hi, <16 x i8> %bc1hi, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
   3415   %shuffle16 = bitcast <32 x i8> %shuffle8 to <16 x i16>
   3416   ret <16 x i16> %shuffle16
   3417 }
   3418 
   3419 define <16 x i16> @PR24935(<16 x i16> %a, <16 x i16> %b) {
   3420 ; AVX1-LABEL: PR24935:
   3421 ; AVX1:       # BB#0:
   3422 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[0,0,1,1]
   3423 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   3424 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm4 = xmm3[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
   3425 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm4[0,1],xmm2[2],xmm4[3,4,5,6,7]
   3426 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
   3427 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm5 = xmm4[0,1,2,3,5,5,6,7]
   3428 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm6 = xmm0[2,3,2,3,4,5,6,7,8,9,8,9,0,1,2,3]
   3429 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm5 = xmm6[0],xmm5[1],xmm6[2,3],xmm5[4],xmm6[5,6,7]
   3430 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm5[0,1],xmm2[2,3],xmm5[4,5,6],xmm2[7]
   3431 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
   3432 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
   3433 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3,4,5],xmm1[6,7]
   3434 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[6,7,4,5,4,5,10,11,4,5,14,15,12,13,0,1]
   3435 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3,4,5],xmm0[6],xmm1[7]
   3436 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3437 ; AVX1-NEXT:    retq
   3438 ;
   3439 ; AVX2-LABEL: PR24935:
   3440 ; AVX2:       # BB#0:
   3441 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
   3442 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm2 = ymm2[6,7,4,5,0,1,10,11,4,5,10,11,4,5,6,7,22,23,20,21,16,17,26,27,20,21,26,27,20,21,22,23]
   3443 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[8,9,10,11,4,5,8,9,0,1,14,15,12,13,0,1,24,25,26,27,20,21,24,25,16,17,30,31,28,29,16,17]
   3444 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = <255,255,255,255,u,u,255,255,255,255,0,0,u,u,0,0,u,u,u,u,255,255,0,0,u,u,u,u,u,u,0,0>
   3445 ; AVX2-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
   3446 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
   3447 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm2 = ymm2[u,u,u,u,u,u,u,u,u,u,u,u,6,7,u,u,18,19,u,u,u,u,u,u,u,u,24,25,16,17,u,u]
   3448 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15]
   3449 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,6,7,8,9,10,11,13,13,14,15]
   3450 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm2[0],ymm0[1,2],ymm2[3],ymm0[4],ymm2[5,6,7,8],ymm0[9,10],ymm2[11],ymm0[12],ymm2[13,14,15]
   3451 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,0,255,255,255,255,255,255,0,0,255,255,0,0,0,0,255,255,255,255,0,0,0,0,0,0,255,255]
   3452 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
   3453 ; AVX2-NEXT:    retq
   3454   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 27, i32 26, i32 1, i32 29, i32 26, i32 23, i32 11, i32 16, i32 1, i32 9, i32 16, i32 28, i32 13, i32 4, i32 0, i32 24>
   3455   ret <16 x i16> %shuffle
   3456 }
   3457 
   3458 define <16 x i16> @insert_dup_mem_v16i16_i32(i32* %ptr) {
   3459 ; AVX1-LABEL: insert_dup_mem_v16i16_i32:
   3460 ; AVX1:       # BB#0:
   3461 ; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   3462 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   3463 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
   3464 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   3465 ; AVX1-NEXT:    retq
   3466 ;
   3467 ; AVX2-LABEL: insert_dup_mem_v16i16_i32:
   3468 ; AVX2:       # BB#0:
   3469 ; AVX2-NEXT:    vpbroadcastw (%rdi), %ymm0
   3470 ; AVX2-NEXT:    retq
   3471   %tmp = load i32, i32* %ptr, align 4
   3472   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
   3473   %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
   3474   %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> zeroinitializer
   3475   ret <16 x i16> %tmp3
   3476 }
   3477 
   3478 define <16 x i16> @insert_dup_mem_v16i16_sext_i16(i16* %ptr) {
   3479 ; AVX1-LABEL: insert_dup_mem_v16i16_sext_i16:
   3480 ; AVX1:       # BB#0:
   3481 ; AVX1-NEXT:    movswl (%rdi), %eax
   3482 ; AVX1-NEXT:    vmovd %eax, %xmm0
   3483 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   3484 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
   3485 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   3486 ; AVX1-NEXT:    retq
   3487 ;
   3488 ; AVX2-LABEL: insert_dup_mem_v16i16_sext_i16:
   3489 ; AVX2:       # BB#0:
   3490 ; AVX2-NEXT:    movswl (%rdi), %eax
   3491 ; AVX2-NEXT:    vmovd %eax, %xmm0
   3492 ; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
   3493 ; AVX2-NEXT:    retq
   3494   %tmp = load i16, i16* %ptr, align 2
   3495   %tmp1 = sext i16 %tmp to i32
   3496   %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
   3497   %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
   3498   %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <16 x i32> zeroinitializer
   3499   ret <16 x i16> %tmp4
   3500 }
   3501 
   3502 define <16 x i16> @insert_dup_elt1_mem_v16i16_i32(i32* %ptr) #0 {
   3503 ; AVX1-LABEL: insert_dup_elt1_mem_v16i16_i32:
   3504 ; AVX1:       # BB#0:
   3505 ; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   3506 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
   3507 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
   3508 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   3509 ; AVX1-NEXT:    retq
   3510 ;
   3511 ; AVX2-LABEL: insert_dup_elt1_mem_v16i16_i32:
   3512 ; AVX2:       # BB#0:
   3513 ; AVX2-NEXT:    vpbroadcastw 2(%rdi), %ymm0
   3514 ; AVX2-NEXT:    retq
   3515   %tmp = load i32, i32* %ptr, align 4
   3516   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
   3517   %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
   3518   %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   3519   ret <16 x i16> %tmp3
   3520 }
   3521 
   3522 define <16 x i16> @insert_dup_elt3_mem_v16i16_i32(i32* %ptr) #0 {
   3523 ; AVX1-LABEL: insert_dup_elt3_mem_v16i16_i32:
   3524 ; AVX1:       # BB#0:
   3525 ; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   3526 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
   3527 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   3528 ; AVX1-NEXT:    retq
   3529 ;
   3530 ; AVX2-LABEL: insert_dup_elt3_mem_v16i16_i32:
   3531 ; AVX2:       # BB#0:
   3532 ; AVX2-NEXT:    vpbroadcastw 2(%rdi), %ymm0
   3533 ; AVX2-NEXT:    retq
   3534   %tmp = load i32, i32* %ptr, align 4
   3535   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
   3536   %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
   3537   %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
   3538   ret <16 x i16> %tmp3
   3539 }
   3540