Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
      2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
      3 
      4 target triple = "x86_64-unknown-unknown"
      5 
      6 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
      7 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
      8 ; AVX1:       # BB#0:
      9 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
     10 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
     11 ; AVX1-NEXT:    retq
     12 ;
     13 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
     14 ; AVX2:       # BB#0:
     15 ; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
     16 ; AVX2-NEXT:    retq
     17   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
     18   ret <16 x i16> %shuffle
     19 }
     20 
     21 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00(<16 x i16> %a, <16 x i16> %b) {
     22 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
     23 ; AVX1:       # BB#0:
     24 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
     25 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
     26 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,4,4,4]
     27 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,5,4]
     28 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     29 ; AVX1-NEXT:    retq
     30 ;
     31 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
     32 ; AVX2:       # BB#0:
     33 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
     34 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
     35 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
     36 ; AVX2-NEXT:    retq
     37   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
     38   ret <16 x i16> %shuffle
     39 }
     40 
     41 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00(<16 x i16> %a, <16 x i16> %b) {
     42 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
     43 ; AVX1:       # BB#0:
     44 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
     45 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
     46 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     47 ; AVX1-NEXT:    retq
     48 ;
     49 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
     50 ; AVX2:       # BB#0:
     51 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
     52 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
     53 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
     54 ; AVX2-NEXT:    retq
     55   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
     56   ret <16 x i16> %shuffle
     57 }
     58 
     59 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00(<16 x i16> %a, <16 x i16> %b) {
     60 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
     61 ; AVX1:       # BB#0:
     62 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
     63 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
     64 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     65 ; AVX1-NEXT:    retq
     66 ;
     67 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
     68 ; AVX2:       # BB#0:
     69 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
     70 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
     71 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
     72 ; AVX2-NEXT:    retq
     73   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
     74   ret <16 x i16> %shuffle
     75 }
     76 
     77 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
     78 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
     79 ; AVX1:       # BB#0:
     80 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
     81 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
     82 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     83 ; AVX1-NEXT:    retq
     84 ;
     85 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
     86 ; AVX2:       # BB#0:
     87 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
     88 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
     89 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
     90 ; AVX2-NEXT:    retq
     91   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
     92   ret <16 x i16> %shuffle
     93 }
     94 
     95 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
     96 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
     97 ; AVX1:       # BB#0:
     98 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
     99 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
    100 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    101 ; AVX1-NEXT:    retq
    102 ;
    103 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
    104 ; AVX2:       # BB#0:
    105 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
    106 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
    107 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
    108 ; AVX2-NEXT:    retq
    109   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
    110   ret <16 x i16> %shuffle
    111 }
    112 
    113 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    114 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
    115 ; AVX1:       # BB#0:
    116 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    117 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
    118 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    119 ; AVX1-NEXT:    retq
    120 ;
    121 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
    122 ; AVX2:       # BB#0:
    123 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
    124 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
    125 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
    126 ; AVX2-NEXT:    retq
    127   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    128   ret <16 x i16> %shuffle
    129 }
    130 
    131 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    132 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
    133 ; AVX1:       # BB#0:
    134 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    135 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    136 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    137 ; AVX1-NEXT:    retq
    138 ;
    139 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
    140 ; AVX2:       # BB#0:
    141 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
    142 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    143 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
    144 ; AVX2-NEXT:    retq
    145   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    146   ret <16 x i16> %shuffle
    147 }
    148 
    149 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    150 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
    151 ; AVX1:       # BB#0:
    152 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    153 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    154 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3]
    155 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    156 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    157 ; AVX1-NEXT:    retq
    158 ;
    159 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
    160 ; AVX2:       # BB#0:
    161 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    162 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    163 ; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
    164 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
    165 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
    166 ; AVX2-NEXT:    retq
    167   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    168   ret <16 x i16> %shuffle
    169 }
    170 
    171 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    172 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
    173 ; AVX1:       # BB#0:
    174 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    175 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    176 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,6,7,0,1]
    177 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    178 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    179 ; AVX1-NEXT:    retq
    180 ;
    181 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
    182 ; AVX2:       # BB#0:
    183 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    184 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,0,255,255,u,u,u,u,u,u,u,u,u,u,u,u,255,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
    185 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
    186 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    187 ; AVX2-NEXT:    retq
    188   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    189   ret <16 x i16> %shuffle
    190 }
    191 
    192 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    193 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
    194 ; AVX1:       # BB#0:
    195 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    196 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    197 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1]
    198 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    199 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    200 ; AVX1-NEXT:    retq
    201 ;
    202 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
    203 ; AVX2:       # BB#0:
    204 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    205 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
    206 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    207 ; AVX2-NEXT:    retq
    208   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    209   ret <16 x i16> %shuffle
    210 }
    211 
    212 define <16 x i16> @shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    213 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
    214 ; AVX1:       # BB#0:
    215 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    216 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    217 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1]
    218 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    219 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    220 ; AVX1-NEXT:    retq
    221 ;
    222 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
    223 ; AVX2:       # BB#0:
    224 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    225 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
    226 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    227 ; AVX2-NEXT:    retq
    228   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    229   ret <16 x i16> %shuffle
    230 }
    231 
    232 define <16 x i16> @shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    233 ; AVX1-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
    234 ; AVX1:       # BB#0:
    235 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    236 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    237 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
    238 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    239 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    240 ; AVX1-NEXT:    retq
    241 ;
    242 ; AVX2-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
    243 ; AVX2:       # BB#0:
    244 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    245 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
    246 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    247 ; AVX2-NEXT:    retq
    248   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 12, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    249   ret <16 x i16> %shuffle
    250 }
    251 
    252 define <16 x i16> @shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    253 ; AVX1-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
    254 ; AVX1:       # BB#0:
    255 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    256 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    257 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
    258 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    259 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    260 ; AVX1-NEXT:    retq
    261 ;
    262 ; AVX2-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
    263 ; AVX2:       # BB#0:
    264 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    265 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
    266 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    267 ; AVX2-NEXT:    retq
    268   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    269   ret <16 x i16> %shuffle
    270 }
    271 
    272 define <16 x i16> @shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    273 ; AVX1-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
    274 ; AVX1:       # BB#0:
    275 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    276 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    277 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
    278 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    279 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    280 ; AVX1-NEXT:    retq
    281 ;
    282 ; AVX2-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
    283 ; AVX2:       # BB#0:
    284 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    285 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
    286 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    287 ; AVX2-NEXT:    retq
    288   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    289   ret <16 x i16> %shuffle
    290 }
    291 
    292 define <16 x i16> @shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    293 ; AVX1-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
    294 ; AVX1:       # BB#0:
    295 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    296 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    297 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    298 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    299 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    300 ; AVX1-NEXT:    retq
    301 ;
    302 ; AVX2-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
    303 ; AVX2:       # BB#0:
    304 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    305 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
    306 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    307 ; AVX2-NEXT:    retq
    308   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    309   ret <16 x i16> %shuffle
    310 }
    311 
    312 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
    313 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
    314 ; AVX1:       # BB#0:
    315 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    316 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    317 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    318 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    319 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    320 ; AVX1-NEXT:    retq
    321 ;
    322 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
    323 ; AVX2:       # BB#0:
    324 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    325 ; AVX2-NEXT:    retq
    326   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
    327   ret <16 x i16> %shuffle
    328 }
    329 
    330 define <16 x i16> @shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15(<16 x i16> %a, <16 x i16> %b) {
    331 ; AVX1-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
    332 ; AVX1:       # BB#0:
    333 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    334 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15]
    335 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    336 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    337 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    338 ; AVX1-NEXT:    retq
    339 ;
    340 ; AVX2-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
    341 ; AVX2:       # BB#0:
    342 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15,30,31,30,31,30,31,30,31,30,31,30,31,30,31,30,31]
    343 ; AVX2-NEXT:    retq
    344   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
    345   ret <16 x i16> %shuffle
    346 }
    347 
    348 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
    349 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
    350 ; AVX1:       # BB#0:
    351 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
    352 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
    353 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    354 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
    355 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
    356 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    357 ; AVX1-NEXT:    retq
    358 ;
    359 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
    360 ; AVX2:       # BB#0:
    361 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
    362 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
    363 ; AVX2-NEXT:    retq
    364   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
    365   ret <16 x i16> %shuffle
    366 }
    367 
    368 define <16 x i16> @shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15(<16 x i16> %a, <16 x i16> %b) {
    369 ; AVX1-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
    370 ; AVX1:       # BB#0:
    371 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
    372 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,7,7,7]
    373 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    374 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
    375 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
    376 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    377 ; AVX1-NEXT:    retq
    378 ;
    379 ; AVX2-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
    380 ; AVX2:       # BB#0:
    381 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15]
    382 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
    383 ; AVX2-NEXT:    retq
    384   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7, i32 11, i32 11, i32 11, i32 11, i32 15, i32 15, i32 15, i32 15>
    385   ret <16 x i16> %shuffle
    386 }
    387 
    388 define <16 x i16> @shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14(<16 x i16> %a, <16 x i16> %b) {
    389 ; AVX1-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
    390 ; AVX1:       # BB#0:
    391 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7]
    392 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
    393 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    394 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
    395 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6]
    396 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    397 ; AVX1-NEXT:    retq
    398 ;
    399 ; AVX2-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
    400 ; AVX2:       # BB#0:
    401 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,2,2,4,5,6,7,8,8,10,10,12,13,14,15]
    402 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,6,8,9,10,11,12,12,14,14]
    403 ; AVX2-NEXT:    retq
    404   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
    405   ret <16 x i16> %shuffle
    406 }
    407 
    408 define <16 x i16> @shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15(<16 x i16> %a, <16 x i16> %b) {
    409 ; AVX1-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
    410 ; AVX1:       # BB#0:
    411 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[1,1,3,3,4,5,6,7]
    412 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,7,7]
    413 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    414 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,1,3,3,4,5,6,7]
    415 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7]
    416 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    417 ; AVX1-NEXT:    retq
    418 ;
    419 ; AVX2-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
    420 ; AVX2:       # BB#0:
    421 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15]
    422 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,7,7,8,9,10,11,13,13,15,15]
    423 ; AVX2-NEXT:    retq
    424   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
    425   ret <16 x i16> %shuffle
    426 }
    427 
    428 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00(<16 x i16> %a, <16 x i16> %b) {
    429 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
    430 ; AVX1:       # BB#0:
    431 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
    432 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    433 ; AVX1-NEXT:    retq
    434 ;
    435 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
    436 ; AVX2:       # BB#0:
    437 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
    438 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
    439 ; AVX2-NEXT:    retq
    440   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
    441   ret <16 x i16> %shuffle
    442 }
    443 
    444 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00(<16 x i16> %a, <16 x i16> %b) {
    445 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
    446 ; AVX1:       # BB#0:
    447 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
    448 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    449 ; AVX1-NEXT:    retq
    450 ;
    451 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
    452 ; AVX2:       # BB#0:
    453 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
    454 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
    455 ; AVX2-NEXT:    retq
    456   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
    457   ret <16 x i16> %shuffle
    458 }
    459 
    460 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    461 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
    462 ; AVX1:       # BB#0:
    463 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
    464 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    465 ; AVX1-NEXT:    retq
    466 ;
    467 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
    468 ; AVX2:       # BB#0:
    469 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
    470 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
    471 ; AVX2-NEXT:    retq
    472   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
    473   ret <16 x i16> %shuffle
    474 }
    475 
    476 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    477 ; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00:
    478 ; AVX1:       # BB#0:
    479 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
    480 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    481 ; AVX1-NEXT:    retq
    482 ;
    483 ; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00:
    484 ; AVX2:       # BB#0:
    485 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
    486 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
    487 ; AVX2-NEXT:    retq
    488   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
    489   ret <16 x i16> %shuffle
    490 }
    491 
    492 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    493 ; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00:
    494 ; AVX1:       # BB#0:
    495 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
    496 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    497 ; AVX1-NEXT:    retq
    498 ;
    499 ; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00:
    500 ; AVX2:       # BB#0:
    501 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
    502 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
    503 ; AVX2-NEXT:    retq
    504   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
    505   ret <16 x i16> %shuffle
    506 }
    507 
    508 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    509 ; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
    510 ; AVX1:       # BB#0:
    511 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
    512 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    513 ; AVX1-NEXT:    retq
    514 ;
    515 ; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
    516 ; AVX2:       # BB#0:
    517 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
    518 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
    519 ; AVX2-NEXT:    retq
    520   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    521   ret <16 x i16> %shuffle
    522 }
    523 
    524 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    525 ; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
    526 ; AVX1:       # BB#0:
    527 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    528 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    529 ; AVX1-NEXT:    retq
    530 ;
    531 ; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
    532 ; AVX2:       # BB#0:
    533 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    534 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
    535 ; AVX2-NEXT:    retq
    536   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    537   ret <16 x i16> %shuffle
    538 }
    539 
    540 define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31(<16 x i16> %a, <16 x i16> %b) {
    541 ; AVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
    542 ; AVX1:       # BB#0:
    543 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    544 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
    545 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
    546 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
    547 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    548 ; AVX1-NEXT:    retq
    549 ;
    550 ; AVX2-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
    551 ; AVX2:       # BB#0:
    552 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
    553 ; AVX2-NEXT:    retq
    554   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
    555   ret <16 x i16> %shuffle
    556 }
    557 
    558 define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15(<16 x i16> %a, <16 x i16> %b) {
    559 ; AVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15:
    560 ; AVX1:       # BB#0:
    561 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    562 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
    563 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
    564 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
    565 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    566 ; AVX1-NEXT:    retq
    567 ;
    568 ; AVX2-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15:
    569 ; AVX2:       # BB#0:
    570 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
    571 ; AVX2-NEXT:    retq
    572   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
    573   ret <16 x i16> %shuffle
    574 }
    575 
    576 define <16 x i16> @shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31(<16 x i16> %a, <16 x i16> %b) {
    577 ; AVX1-LABEL: shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31:
    578 ; AVX1:       # BB#0:
    579 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
    580 ; AVX1-NEXT:    retq
    581 ;
    582 ; AVX2-LABEL: shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31:
    583 ; AVX2:       # BB#0:
    584 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
    585 ; AVX2-NEXT:    retq
    586   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
    587   ret <16 x i16> %shuffle
    588 }
    589 
    590 define <16 x i16> @shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
    591 ; AVX1-LABEL: shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15:
    592 ; AVX1:       # BB#0:
    593 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
    594 ; AVX1-NEXT:    retq
    595 ;
    596 ; AVX2-LABEL: shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15:
    597 ; AVX2:       # BB#0:
    598 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
    599 ; AVX2-NEXT:    retq
    600   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 12, i32 13, i32 14, i32 15>
    601   ret <16 x i16> %shuffle
    602 }
    603 
    604 define <16 x i16> @shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31(<16 x i16> %a, <16 x i16> %b) {
    605 ; AVX1-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31:
    606 ; AVX1:       # BB#0:
    607 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
    608 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    609 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5,6],xmm1[7]
    610 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    611 ; AVX1-NEXT:    retq
    612 ;
    613 ; AVX2-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31:
    614 ; AVX2:       # BB#0:
    615 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0]
    616 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
    617 ; AVX2-NEXT:    retq
    618   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31>
    619   ret <16 x i16> %shuffle
    620 }
    621 
    622 define <16 x i16> @shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
    623 ; AVX1-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15:
    624 ; AVX1:       # BB#0:
    625 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
    626 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    627 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    628 ; AVX1-NEXT:    retq
    629 ;
    630 ; AVX2-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15:
    631 ; AVX2:       # BB#0:
    632 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
    633 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
    634 ; AVX2-NEXT:    retq
    635   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    636   ret <16 x i16> %shuffle
    637 }
    638 
    639 define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15(<16 x i16> %a, <16 x i16> %b) {
    640 ; AVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15:
    641 ; AVX1:       # BB#0:
    642 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    643 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
    644 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
    645 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
    646 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    647 ; AVX1-NEXT:    retq
    648 ;
    649 ; AVX2-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15:
    650 ; AVX2:       # BB#0:
    651 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255]
    652 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
    653 ; AVX2-NEXT:    retq
    654   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
    655   ret <16 x i16> %shuffle
    656 }
    657 
    658 define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31(<16 x i16> %a, <16 x i16> %b) {
    659 ; AVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31:
    660 ; AVX1:       # BB#0:
    661 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    662 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
    663 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
    664 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
    665 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    666 ; AVX1-NEXT:    retq
    667 ;
    668 ; AVX2-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31:
    669 ; AVX2:       # BB#0:
    670 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0]
    671 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
    672 ; AVX2-NEXT:    retq
    673   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
    674   ret <16 x i16> %shuffle
    675 }
    676 
    677 define <16 x i16> @shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31(<16 x i16> %a, <16 x i16> %b) {
    678 ; AVX1-LABEL: shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31:
    679 ; AVX1:       # BB#0:
    680 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4],ymm1[5],ymm0[6],ymm1[7]
    681 ; AVX1-NEXT:    retq
    682 ;
    683 ; AVX2-LABEL: shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31:
    684 ; AVX2:       # BB#0:
    685 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4],ymm1[5],ymm0[6],ymm1[7]
    686 ; AVX2-NEXT:    retq
    687   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 6, i32 7, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
    688   ret <16 x i16> %shuffle
    689 }
    690 
    691 define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16(<16 x i16> %a, <16 x i16> %b) {
    692 ; AVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16:
    693 ; AVX1:       # BB#0:
    694 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    695 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    696 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    697 ; AVX1-NEXT:    retq
    698 ;
    699 ; AVX2-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16:
    700 ; AVX2:       # BB#0:
    701 ; AVX2-NEXT:    vpbroadcastw %xmm1, %ymm1
    702 ; AVX2-NEXT:    vpbroadcastd %xmm0, %ymm0
    703 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
    704 ; AVX2-NEXT:    retq
    705   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16>
    706   ret <16 x i16> %shuffle
    707 }
    708 
    709 define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24(<16 x i16> %a, <16 x i16> %b) {
    710 ; AVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
    711 ; AVX1:       # BB#0:
    712 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    713 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
    714 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
    715 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    716 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    717 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    718 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
    719 ; AVX1-NEXT:    retq
    720 ;
    721 ; AVX2-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
    722 ; AVX2:       # BB#0:
    723 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,0,1,0,1,0,1,12,13,0,1,16,17,16,17,20,21,16,17,16,17,16,17,28,29,16,17]
    724 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
    725 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
    726 ; AVX2-NEXT:    retq
    727   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24>
    728   ret <16 x i16> %shuffle
    729 }
    730 
    731 define <16 x i16> @shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
    732 ; AVX1-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15:
    733 ; AVX1:       # BB#0:
    734 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    735 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
    736 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
    737 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7]
    738 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
    739 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    740 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
    741 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
    742 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    743 ; AVX1-NEXT:    retq
    744 ;
    745 ; AVX2-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15:
    746 ; AVX2:       # BB#0:
    747 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm1 = ymm1[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
    748 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
    749 ; AVX2-NEXT:    retq
    750   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 4, i32 5, i32 6, i32 7, i32 24, i32 24, i32 24, i32 24, i32 12, i32 13, i32 14, i32 15>
    751   ret <16 x i16> %shuffle
    752 }
    753 
    754 define <16 x i16> @shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12(<16 x i16> %a, <16 x i16> %b) {
    755 ; AVX1-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
    756 ; AVX1:       # BB#0:
    757 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    758 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
    759 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
    760 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
    761 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7]
    762 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
    763 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
    764 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
    765 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
    766 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
    767 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    768 ; AVX1-NEXT:    retq
    769 ;
    770 ; AVX2-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
    771 ; AVX2:       # BB#0:
    772 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
    773 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
    774 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12]
    775 ; AVX2-NEXT:    retq
    776   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 7, i32 6, i32 5, i32 4, i32 27, i32 26, i32 25, i32 24, i32 15, i32 14, i32 13, i32 12>
    777   ret <16 x i16> %shuffle
    778 }
    779 
    780 define <16 x i16> @shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08(<16 x i16> %a, <16 x i16> %b) {
    781 ; AVX1-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08:
    782 ; AVX1:       # BB#0:
    783 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    784 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
    785 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
    786 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [12,13,8,9,4,5,0,1,14,15,10,11,6,7,2,3]
    787 ; AVX1-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
    788 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
    789 ; AVX1-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
    790 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    791 ; AVX1-NEXT:    retq
    792 ;
    793 ; AVX2-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08:
    794 ; AVX2:       # BB#0:
    795 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm1 = ymm1[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
    796 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
    797 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12]
    798 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
    799 ; AVX2-NEXT:    retq
    800   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 3, i32 2, i32 1, i32 0, i32 27, i32 26, i32 25, i32 24, i32 11, i32 10, i32 9, i32 8>
    801   ret <16 x i16> %shuffle
    802 }
    803 
    804 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08(<16 x i16> %a, <16 x i16> %b) {
    805 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
    806 ; AVX1:       # BB#0:
    807 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    808 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
    809 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    810 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    811 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    812 ; AVX1-NEXT:    retq
    813 ;
    814 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
    815 ; AVX2:       # BB#0:
    816 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,18,19,16,17]
    817 ; AVX2-NEXT:    retq
    818   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 8>
    819   ret <16 x i16> %shuffle
    820 }
    821 
    822 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08(<16 x i16> %a, <16 x i16> %b) {
    823 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
    824 ; AVX1:       # BB#0:
    825 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    826 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
    827 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    828 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    829 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    830 ; AVX1-NEXT:    retq
    831 ;
    832 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
    833 ; AVX2:       # BB#0:
    834 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,20,21,16,17,16,17]
    835 ; AVX2-NEXT:    retq
    836   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 10, i32 8, i32 8>
    837   ret <16 x i16> %shuffle
    838 }
    839 
    840 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08(<16 x i16> %a, <16 x i16> %b) {
    841 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
    842 ; AVX1:       # BB#0:
    843 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    844 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
    845 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    846 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    847 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    848 ; AVX1-NEXT:    retq
    849 ;
    850 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
    851 ; AVX2:       # BB#0:
    852 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,22,23,16,17,16,17,16,17]
    853 ; AVX2-NEXT:    retq
    854   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 11, i32 8, i32 8, i32 8>
    855   ret <16 x i16> %shuffle
    856 }
    857 
    858 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
    859 ; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08:
    860 ; AVX1:       # BB#0:
    861 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    862 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
    863 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    864 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    865 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    866 ; AVX1-NEXT:    retq
    867 ;
    868 ; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08:
    869 ; AVX2:       # BB#0:
    870 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,24,25,16,17,16,17,16,17,16,17]
    871 ; AVX2-NEXT:    retq
    872   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 12, i32 8, i32 8, i32 8, i32 8>
    873   ret <16 x i16> %shuffle
    874 }
    875 
    876 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
    877 ; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08:
    878 ; AVX1:       # BB#0:
    879 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    880 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
    881 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    882 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    883 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    884 ; AVX1-NEXT:    retq
    885 ;
    886 ; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08:
    887 ; AVX2:       # BB#0:
    888 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,26,27,16,17,16,17,16,17,16,17,16,17]
    889 ; AVX2-NEXT:    retq
    890   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 13, i32 8, i32 8, i32 8, i32 8, i32 8>
    891   ret <16 x i16> %shuffle
    892 }
    893 
    894 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
    895 ; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08:
    896 ; AVX1:       # BB#0:
    897 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    898 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
    899 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    900 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    901 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    902 ; AVX1-NEXT:    retq
    903 ;
    904 ; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08:
    905 ; AVX2:       # BB#0:
    906 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,28,29,16,17,16,17,16,17,16,17,16,17,16,17]
    907 ; AVX2-NEXT:    retq
    908   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 14, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
    909   ret <16 x i16> %shuffle
    910 }
    911 
    912 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
    913 ; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08:
    914 ; AVX1:       # BB#0:
    915 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    916 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    917 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    918 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    919 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    920 ; AVX1-NEXT:    retq
    921 ;
    922 ; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08:
    923 ; AVX2:       # BB#0:
    924 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,30,31,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    925 ; AVX2-NEXT:    retq
    926   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
    927   ret <16 x i16> %shuffle
    928 }
    929 
    930 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) {
    931 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27:
    932 ; AVX1:       # BB#0:
    933 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    934 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
    935 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
    936 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    937 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    938 ; AVX1-NEXT:    retq
    939 ;
    940 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27:
    941 ; AVX2:       # BB#0:
    942 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
    943 ; AVX2-NEXT:    retq
    944   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
    945   ret <16 x i16> %shuffle
    946 }
    947 
    948 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
    949 ; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31:
    950 ; AVX1:       # BB#0:
    951 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    952 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
    953 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
    954 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    955 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    956 ; AVX1-NEXT:    retq
    957 ;
    958 ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31:
    959 ; AVX2:       # BB#0:
    960 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
    961 ; AVX2-NEXT:    retq
    962   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
    963   ret <16 x i16> %shuffle
    964 }
    965 
    966 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
    967 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31:
    968 ; AVX1:       # BB#0:
    969 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    970 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
    971 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
    972 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    973 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    974 ; AVX1-NEXT:    retq
    975 ;
    976 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31:
    977 ; AVX2:       # BB#0:
    978 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,6,7,u,u,24,25,u,u,26,27,u,u,28,29,u,u,30,31]
    979 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,2,3,u,u,4,5,u,u,6,7,u,u,24,25,u,u,26,27,u,u,28,29,u,u,30,31,u,u]
    980 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
    981 ; AVX2-NEXT:    retq
    982   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
    983   ret <16 x i16> %shuffle
    984 }
    985 
    986 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) {
    987 ; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27:
    988 ; AVX1:       # BB#0:
    989 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    990 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
    991 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
    992 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    993 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    994 ; AVX1-NEXT:    retq
    995 ;
    996 ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27:
    997 ; AVX2:       # BB#0:
    998 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,8,9,u,u,10,11,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
    999 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,u,u,10,11,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23,u,u]
   1000 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
   1001 ; AVX2-NEXT:    retq
   1002   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
   1003   ret <16 x i16> %shuffle
   1004 }
   1005 
   1006 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   1007 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08:
   1008 ; AVX1:       # BB#0:
   1009 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
   1010 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1011 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,0,1,0,1,0,1,0,1,0,1,0,1]
   1012 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1013 ; AVX1-NEXT:    retq
   1014 ;
   1015 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08:
   1016 ; AVX2:       # BB#0:
   1017 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,18,19,16,17,16,17,16,17,16,17,16,17,16,17]
   1018 ; AVX2-NEXT:    retq
   1019   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 8, i32 9, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
   1020   ret <16 x i16> %shuffle
   1021 }
   1022 
   1023 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   1024 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08:
   1025 ; AVX1:       # BB#0:
   1026 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
   1027 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1028 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,0,1,0,1,0,1,0,1,0,1]
   1029 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1030 ; AVX1-NEXT:    retq
   1031 ;
   1032 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08:
   1033 ; AVX2:       # BB#0:
   1034 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,20,21,16,17,16,17,16,17,16,17,16,17]
   1035 ; AVX2-NEXT:    retq
   1036   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 8, i32 8, i32 10, i32 8, i32 8, i32 8, i32 8, i32 8>
   1037   ret <16 x i16> %shuffle
   1038 }
   1039 
   1040 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   1041 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08:
   1042 ; AVX1:       # BB#0:
   1043 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
   1044 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1045 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,6,7,0,1,0,1,0,1,0,1]
   1046 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1047 ; AVX1-NEXT:    retq
   1048 ;
   1049 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08:
   1050 ; AVX2:       # BB#0:
   1051 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,22,23,16,17,16,17,16,17,16,17]
   1052 ; AVX2-NEXT:    retq
   1053   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 11, i32 8, i32 8, i32 8, i32 8>
   1054   ret <16 x i16> %shuffle
   1055 }
   1056 
   1057 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   1058 ; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08:
   1059 ; AVX1:       # BB#0:
   1060 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
   1061 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1062 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,0,1,0,1,0,1]
   1063 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1064 ; AVX1-NEXT:    retq
   1065 ;
   1066 ; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08:
   1067 ; AVX2:       # BB#0:
   1068 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,16,17,16,17,16,17]
   1069 ; AVX2-NEXT:    retq
   1070   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 12, i32 8, i32 8, i32 8>
   1071   ret <16 x i16> %shuffle
   1072 }
   1073 
   1074 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08(<16 x i16> %a, <16 x i16> %b) {
   1075 ; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08:
   1076 ; AVX1:       # BB#0:
   1077 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
   1078 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1079 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1]
   1080 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1081 ; AVX1-NEXT:    retq
   1082 ;
   1083 ; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08:
   1084 ; AVX2:       # BB#0:
   1085 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,26,27,16,17,16,17]
   1086 ; AVX2-NEXT:    retq
   1087   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 13, i32 8, i32 8>
   1088   ret <16 x i16> %shuffle
   1089 }
   1090 
   1091 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08(<16 x i16> %a, <16 x i16> %b) {
   1092 ; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08:
   1093 ; AVX1:       # BB#0:
   1094 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
   1095 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1096 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,12,13,0,1]
   1097 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1098 ; AVX1-NEXT:    retq
   1099 ;
   1100 ; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08:
   1101 ; AVX2:       # BB#0:
   1102 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,28,29,16,17]
   1103 ; AVX2-NEXT:    retq
   1104   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 14, i32 8>
   1105   ret <16 x i16> %shuffle
   1106 }
   1107 
   1108 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15(<16 x i16> %a, <16 x i16> %b) {
   1109 ; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15:
   1110 ; AVX1:       # BB#0:
   1111 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
   1112 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1113 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,14,15]
   1114 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1115 ; AVX1-NEXT:    retq
   1116 ;
   1117 ; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15:
   1118 ; AVX2:       # BB#0:
   1119 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,30,31]
   1120 ; AVX2-NEXT:    retq
   1121   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 15>
   1122   ret <16 x i16> %shuffle
   1123 }
   1124 
   1125 define <16 x i16> @shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08(<16 x i16> %a, <16 x i16> %b) {
   1126 ; AVX1-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08:
   1127 ; AVX1:       # BB#0:
   1128 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7]
   1129 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
   1130 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1131 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,8,9,8,9,4,5,4,5,0,1,0,1]
   1132 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1133 ; AVX1-NEXT:    retq
   1134 ;
   1135 ; AVX2-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08:
   1136 ; AVX2:       # BB#0:
   1137 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13,28,29,28,29,24,25,24,25,20,21,20,21,16,17,16,17]
   1138 ; AVX2-NEXT:    retq
   1139   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 14, i32 14, i32 12, i32 12, i32 10, i32 10, i32 8, i32 8>
   1140   ret <16 x i16> %shuffle
   1141 }
   1142 
   1143 define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   1144 ; AVX1-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12:
   1145 ; AVX1:       # BB#0:
   1146 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
   1147 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1148 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   1149 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
   1150 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1151 ; AVX1-NEXT:    retq
   1152 ;
   1153 ; AVX2-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12:
   1154 ; AVX2:       # BB#0:
   1155 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
   1156 ; AVX2-NEXT:    retq
   1157   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
   1158   ret <16 x i16> %shuffle
   1159 }
   1160 
   1161 define <16 x i16> @shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08(<16 x i16> %a, <16 x i16> %b) {
   1162 ; AVX1-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08:
   1163 ; AVX1:       # BB#0:
   1164 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,0,1,0,1,0,1,0,1,0,1]
   1165 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1166 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,6,7,0,1,0,1,12,13,0,1]
   1167 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1168 ; AVX1-NEXT:    retq
   1169 ;
   1170 ; AVX2-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08:
   1171 ; AVX2:       # BB#0:
   1172 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,28,29,16,17]
   1173 ; AVX2-NEXT:    retq
   1174   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 undef, i32 undef, i32 8, i32 8, i32 14, i32 8>
   1175   ret <16 x i16> %shuffle
   1176 }
   1177 
   1178 define <16 x i16> @shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15(<16 x i16> %a, <16 x i16> %b) {
   1179 ; AVX1-LABEL: shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15:
   1180 ; AVX1:       # BB#0:
   1181 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[14,15,2,3,0,1,0,1,0,1,0,1,0,1,0,1]
   1182 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1183 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,6,7,0,1,0,1,0,1,14,15]
   1184 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1185 ; AVX1-NEXT:    retq
   1186 ;
   1187 ; AVX2-LABEL: shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15:
   1188 ; AVX2:       # BB#0:
   1189 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,u,u,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,16,17,30,31]
   1190 ; AVX2-NEXT:    retq
   1191   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 undef, i32 undef, i32 8, i32 8, i32 8, i32 15>
   1192   ret <16 x i16> %shuffle
   1193 }
   1194 
   1195 define <16 x i16> @shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08(<16 x i16> %a, <16 x i16> %b) {
   1196 ; AVX1-LABEL: shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08:
   1197 ; AVX1:       # BB#0:
   1198 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,1,2,2,4,5,6,7]
   1199 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
   1200 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1201 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,12,13,8,9,4,5,4,5,0,1,0,1]
   1202 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1203 ; AVX1-NEXT:    retq
   1204 ;
   1205 ; AVX2-LABEL: shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08:
   1206 ; AVX2:       # BB#0:
   1207 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,4,5,8,9,8,9,u,u,12,13,28,29,28,29,u,u,24,25,20,21,20,21,16,17,16,17]
   1208 ; AVX2-NEXT:    retq
   1209   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 undef, i32 2, i32 4, i32 4, i32 undef, i32 6, i32 14, i32 14, i32 undef, i32 12, i32 10, i32 10, i32 8, i32 8>
   1210   ret <16 x i16> %shuffle
   1211 }
   1212 
   1213 define <16 x i16> @shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   1214 ; AVX1-LABEL: shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12:
   1215 ; AVX1:       # BB#0:
   1216 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,1,2,3]
   1217 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
   1218 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1219 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,3,4,5,6,7]
   1220 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
   1221 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1222 ; AVX1-NEXT:    retq
   1223 ;
   1224 ; AVX2-LABEL: shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12:
   1225 ; AVX2:       # BB#0:
   1226 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,u,u,u,u,u,u,u,u,16,17,16,17,16,17,u,u,u,u,24,25,24,25,24,25]
   1227 ; AVX2-NEXT:    retq
   1228   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 12, i32 12, i32 12>
   1229   ret <16 x i16> %shuffle
   1230 }
   1231 
   1232 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20(<16 x i16> %a, <16 x i16> %b) {
   1233 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20:
   1234 ; AVX1:       # BB#0:
   1235 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   1236 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
   1237 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
   1238 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
   1239 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1240 ; AVX1-NEXT:    retq
   1241 ;
   1242 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20:
   1243 ; AVX2:       # BB#0:
   1244 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1245 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
   1246 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
   1247 ; AVX2-NEXT:    retq
   1248   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20>
   1249   ret <16 x i16> %shuffle
   1250 }
   1251 
   1252 define <16 x i16> @shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20(<16 x i16> %a, <16 x i16> %b) {
   1253 ; AVX1-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20:
   1254 ; AVX1:       # BB#0:
   1255 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1256 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   1257 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
   1258 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
   1259 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
   1260 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1261 ; AVX1-NEXT:    retq
   1262 ;
   1263 ; AVX2-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20:
   1264 ; AVX2:       # BB#0:
   1265 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
   1266 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
   1267 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
   1268 ; AVX2-NEXT:    retq
   1269   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20>
   1270   ret <16 x i16> %shuffle
   1271 }
   1272 
   1273 define <16 x i16> @shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28(<16 x i16> %a, <16 x i16> %b) {
   1274 ; AVX1-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28:
   1275 ; AVX1:       # BB#0:
   1276 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1277 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   1278 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
   1279 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
   1280 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
   1281 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
   1282 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1283 ; AVX1-NEXT:    retq
   1284 ;
   1285 ; AVX2-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28:
   1286 ; AVX2:       # BB#0:
   1287 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
   1288 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
   1289 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
   1290 ; AVX2-NEXT:    retq
   1291   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28>
   1292   ret <16 x i16> %shuffle
   1293 }
   1294 
   1295 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28(<16 x i16> %a, <16 x i16> %b) {
   1296 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28:
   1297 ; AVX1:       # BB#0:
   1298 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   1299 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
   1300 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
   1301 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
   1302 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
   1303 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1304 ; AVX1-NEXT:    retq
   1305 ;
   1306 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28:
   1307 ; AVX2:       # BB#0:
   1308 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
   1309 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
   1310 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
   1311 ; AVX2-NEXT:    retq
   1312   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28>
   1313   ret <16 x i16> %shuffle
   1314 }
   1315 
   1316 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i16> %a, <16 x i16> %b) {
   1317 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
   1318 ; AVX1:       # BB#0:
   1319 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1320 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1321 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1322 ; AVX1-NEXT:    retq
   1323 ;
   1324 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
   1325 ; AVX2:       # BB#0:
   1326 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1327 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1328 ; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
   1329 ; AVX2-NEXT:    retq
   1330   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
   1331   ret <16 x i16> %shuffle
   1332 }
   1333 
   1334 define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) {
   1335 ; AVX1-LABEL: shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24:
   1336 ; AVX1:       # BB#0:
   1337 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
   1338 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1339 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
   1340 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1341 ; AVX1-NEXT:    retq
   1342 ;
   1343 ; AVX2-LABEL: shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24:
   1344 ; AVX2:       # BB#0:
   1345 ; AVX2-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
   1346 ; AVX2-NEXT:    retq
   1347   %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24>
   1348   ret <16 x i16> %shuffle
   1349 }
   1350 
   1351 define <16 x i16> @shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz(<16 x i16> %a) {
   1352 ; AVX1-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz:
   1353 ; AVX1:       # BB#0:
   1354 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
   1355 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1356 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
   1357 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1358 ; AVX1-NEXT:    retq
   1359 ;
   1360 ; AVX2-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz:
   1361 ; AVX2:       # BB#0:
   1362 ; AVX2-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero
   1363 ; AVX2-NEXT:    retq
   1364   %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0>
   1365   ret <16 x i16> %shuffle
   1366 }
   1367 
   1368 ;
   1369 ; Shuffle to logical bit shifts
   1370 ;
   1371 
   1372 define <16 x i16> @shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14(<16 x i16> %a) {
   1373 ; AVX1-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
   1374 ; AVX1:       # BB#0:
   1375 ; AVX1-NEXT:    vpslld $16, %xmm0, %xmm1
   1376 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1377 ; AVX1-NEXT:    vpslld $16, %xmm0, %xmm0
   1378 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1379 ; AVX1-NEXT:    retq
   1380 ;
   1381 ; AVX2-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
   1382 ; AVX2:       # BB#0:
   1383 ; AVX2-NEXT:    vpslld $16, %ymm0, %ymm0
   1384 ; AVX2-NEXT:    retq
   1385   %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 0, i32 16, i32 2, i32 16, i32 4, i32 16, i32 6, i32 16, i32 8, i32 16, i32 10, i32 16, i32 12, i32 16, i32 14>
   1386   ret <16 x i16> %shuffle
   1387 }
   1388 
   1389 define <16 x i16> @shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12(<16 x i16> %a) {
   1390 ; AVX1-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
   1391 ; AVX1:       # BB#0:
   1392 ; AVX1-NEXT:    vpsllq $48, %xmm0, %xmm1
   1393 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1394 ; AVX1-NEXT:    vpsllq $48, %xmm0, %xmm0
   1395 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1396 ; AVX1-NEXT:    retq
   1397 ;
   1398 ; AVX2-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
   1399 ; AVX2:       # BB#0:
   1400 ; AVX2-NEXT:    vpsllq $48, %ymm0, %ymm0
   1401 ; AVX2-NEXT:    retq
   1402   %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 12>
   1403   ret <16 x i16> %shuffle
   1404 }
   1405 
   1406 define <16 x i16> @shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz(<16 x i16> %a) {
   1407 ; AVX1-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz:
   1408 ; AVX1:       # BB#0:
   1409 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm1
   1410 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1411 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
   1412 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1413 ; AVX1-NEXT:    retq
   1414 ;
   1415 ; AVX2-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz:
   1416 ; AVX2:       # BB#0:
   1417 ; AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
   1418 ; AVX2-NEXT:    retq
   1419   %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 1, i32 16, i32 3, i32 16, i32 5, i32 16, i32 7, i32 16, i32 9, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15, i32 16>
   1420   ret <16 x i16> %shuffle
   1421 }
   1422 
   1423 define <16 x i16> @shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz(<16 x i16> %a) {
   1424 ; AVX1-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:
   1425 ; AVX1:       # BB#0:
   1426 ; AVX1-NEXT:    vxorps %ymm1, %ymm1, %ymm1
   1427 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
   1428 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
   1429 ; AVX1-NEXT:    retq
   1430 ;
   1431 ; AVX2-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:
   1432 ; AVX2:       # BB#0:
   1433 ; AVX2-NEXT:    vpsrlq $32, %ymm0, %ymm0
   1434 ; AVX2-NEXT:    retq
   1435   %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 16, i32 16, i32 10, i32 11, i32 16, i32 16, i32 14, i32 15, i32 16, i32 16>
   1436   ret <16 x i16> %shuffle
   1437 }
   1438 
   1439 define <16 x i16> @shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz(<16 x i16> %a) {
   1440 ; AVX1-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz:
   1441 ; AVX1:       # BB#0:
   1442 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[4,5,2,3,4,5,6,7,6,7,10,11,4,5,6,7]
   1443 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
   1444 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
   1445 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   1446 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1447 ; AVX1-NEXT:    retq
   1448 ;
   1449 ; AVX2-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz:
   1450 ; AVX2:       # BB#0:
   1451 ; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1452 ; AVX2-NEXT:    retq
   1453   %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 16, i32 0, i32 0, i32 0, i32 17, i32 0, i32 0, i32 0, i32 18, i32 0, i32 0, i32 0, i32 19, i32 0, i32 0, i32 0>
   1454   ret <16 x i16> %shuffle
   1455 }
   1456 
   1457 define <16 x i16> @shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz(<16 x i16> %a) {
   1458 ; AVX1-LABEL: shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz:
   1459 ; AVX1:       # BB#0:
   1460 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1461 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1462 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1463 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1464 ; AVX1-NEXT:    retq
   1465 ;
   1466 ; AVX2-LABEL: shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz:
   1467 ; AVX2:       # BB#0:
   1468 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1469 ; AVX2-NEXT:    retq
   1470   %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 16, i32 0, i32 17, i32 0, i32 18, i32 0, i32 19, i32 0, i32 20, i32 0, i32 21, i32 0, i32 22, i32 0, i32 23, i32 0>
   1471   ret <16 x i16> %shuffle
   1472 }
   1473 
   1474 define <16 x i16> @shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14(<16 x i16> %a, <16 x i16> %b) {
   1475 ; AVX1-LABEL: shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14:
   1476 ; AVX1:       # BB#0:
   1477 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   1478 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   1479 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   1480 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   1481 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1482 ; AVX1-NEXT:    retq
   1483 ;
   1484 ; AVX2-LABEL: shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14:
   1485 ; AVX2:       # BB#0:
   1486 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm1[30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
   1487 ; AVX2-NEXT:    retq
   1488   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
   1489   ret <16 x i16> %shuffle
   1490 }
   1491 
   1492 define <16 x i16> @shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24(<16 x i16> %a, <16 x i16> %b) {
   1493 ; AVX1-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24:
   1494 ; AVX1:       # BB#0:
   1495 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   1496 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   1497 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1]
   1498 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1]
   1499 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1500 ; AVX1-NEXT:    retq
   1501 ;
   1502 ; AVX2-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24:
   1503 ; AVX2:       # BB#0:
   1504 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1],ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17]
   1505 ; AVX2-NEXT:    retq
   1506   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24>
   1507   ret <16 x i16> %shuffle
   1508 }
   1509 
   1510 define <16 x i16> @shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8(<16 x i16> %a, <16 x i16> %b) {
   1511 ; AVX1-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8:
   1512 ; AVX1:       # BB#0:
   1513 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   1514 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   1515 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1]
   1516 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
   1517 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1518 ; AVX1-NEXT:    retq
   1519 ;
   1520 ; AVX2-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8:
   1521 ; AVX2:       # BB#0:
   1522 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
   1523 ; AVX2-NEXT:    retq
   1524   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 00, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 8>
   1525   ret <16 x i16> %shuffle
   1526 }
   1527 
   1528 define <16 x i16> @shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30(<16 x i16> %a, <16 x i16> %b) {
   1529 ; AVX1-LABEL: shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30:
   1530 ; AVX1:       # BB#0:
   1531 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   1532 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   1533 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   1534 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   1535 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1536 ; AVX1-NEXT:    retq
   1537 ;
   1538 ; AVX2-LABEL: shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30:
   1539 ; AVX2:       # BB#0:
   1540 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm0[30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
   1541 ; AVX2-NEXT:    retq
   1542   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
   1543   ret <16 x i16> %shuffle
   1544 }
   1545 
   1546 define <16 x i16> @shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16(<16 x i16> %a, <16 x i16> %b) {
   1547 ; AVX1-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16:
   1548 ; AVX1:       # BB#0:
   1549 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
   1550 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
   1551 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1552 ; AVX1-NEXT:    retq
   1553 ;
   1554 ; AVX2-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16:
   1555 ; AVX2:       # BB#0:
   1556 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1557 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,18,19,20,21,22,23,24,25,26,27,28,29,30,31,16,17]
   1558 ; AVX2-NEXT:    retq
   1559   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16>
   1560   ret <16 x i16> %shuffle
   1561 }
   1562 
   1563 define <16 x i16> @shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22(<16 x i16> %a, <16 x i16> %b) {
   1564 ; AVX1-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22:
   1565 ; AVX1:       # BB#0:
   1566 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   1567 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   1568 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1569 ; AVX1-NEXT:    retq
   1570 ;
   1571 ; AVX2-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22:
   1572 ; AVX2:       # BB#0:
   1573 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1574 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,30,31,16,17,18,19,20,21,22,23,24,25,26,27,28,29]
   1575 ; AVX2-NEXT:    retq
   1576   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 23, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
   1577   ret <16 x i16> %shuffle
   1578 }
   1579 
   1580 define <16 x i16> @shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11(<16 x i16> %a, <16 x i16> %b) {
   1581 ; AVX1-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
   1582 ; AVX1:       # BB#0:
   1583 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1584 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1585 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7]
   1586 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7]
   1587 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
   1588 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1589 ; AVX1-NEXT:    retq
   1590 ;
   1591 ; AVX2-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
   1592 ; AVX2:       # BB#0:
   1593 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1594 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1595 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7]
   1596 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7]
   1597 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
   1598 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1599 ; AVX2-NEXT:    retq
   1600   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 11, i32 8, i32 9, i32 8, i32 9, i32 10, i32 11, i32 10, i32 11>
   1601   ret <16 x i16> %shuffle
   1602 }
   1603 
   1604 define <16 x i16> @shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09(<16 x i16> %a, <16 x i16> %b) {
   1605 ; AVX1-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09:
   1606 ; AVX1:       # BB#0:
   1607 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1608 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
   1609 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
   1610 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0]
   1611 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1612 ; AVX1-NEXT:    retq
   1613 ;
   1614 ; AVX2-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09:
   1615 ; AVX2:       # BB#0:
   1616 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1617 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
   1618 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
   1619 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0]
   1620 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1621 ; AVX2-NEXT:    retq
   1622   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 9, i32 14, i32 15, i32 12, i32 13, i32 10, i32 11, i32 8, i32 9>
   1623   ret <16 x i16> %shuffle
   1624 }
   1625 
   1626 define <16 x i16> @shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27(<16 x i16> %a, <16 x i16> %b) {
   1627 ; AVX1-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27:
   1628 ; AVX1:       # BB#0:
   1629 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   1630 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   1631 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7]
   1632 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
   1633 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15]
   1634 ; AVX1-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
   1635 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1636 ; AVX1-NEXT:    retq
   1637 ;
   1638 ; AVX2-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27:
   1639 ; AVX2:       # BB#0:
   1640 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
   1641 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1642 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   1643 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
   1644 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
   1645 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1646 ; AVX2-NEXT:    retq
   1647   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 27, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27>
   1648   ret <16 x i16> %shuffle
   1649 }
   1650 
   1651 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   1652 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
   1653 ; AVX1:       # BB#0:
   1654 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1655 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1656 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3]
   1657 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
   1658 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1659 ; AVX1-NEXT:    retq
   1660 ;
   1661 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
   1662 ; AVX2:       # BB#0:
   1663 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1664 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1665 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3]
   1666 ; AVX2-NEXT:    vpbroadcastw %xmm1, %xmm1
   1667 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1668 ; AVX2-NEXT:    retq
   1669   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
   1670   ret <16 x i16> %shuffle
   1671 }
   1672 
   1673 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   1674 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
   1675 ; AVX1:       # BB#0:
   1676 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1677 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   1678 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   1679 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,7]
   1680 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1681 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
   1682 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
   1683 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1684 ; AVX1-NEXT:    retq
   1685 ;
   1686 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
   1687 ; AVX2:       # BB#0:
   1688 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1689 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm2
   1690 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   1691 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,7]
   1692 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1693 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
   1694 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
   1695 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1696 ; AVX2-NEXT:    retq
   1697   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 12, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
   1698   ret <16 x i16> %shuffle
   1699 }
   1700 
   1701 define <16 x i16> @shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11(<16 x i16> %a, <16 x i16> %b) {
   1702 ; AVX1-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
   1703 ; AVX1:       # BB#0:
   1704 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1705 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1706 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
   1707 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
   1708 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
   1709 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1710 ; AVX1-NEXT:    retq
   1711 ;
   1712 ; AVX2-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
   1713 ; AVX2:       # BB#0:
   1714 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1715 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1716 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
   1717 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
   1718 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
   1719 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1720 ; AVX2-NEXT:    retq
   1721   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 11, i32 undef, i32 8, i32 undef, i32 9, i32 undef, i32 10, i32 undef, i32 11>
   1722   ret <16 x i16> %shuffle
   1723 }
   1724 
   1725 define <16 x i16> @shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15(<16 x i16> %a, <16 x i16> %b) {
   1726 ; AVX1-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
   1727 ; AVX1:       # BB#0:
   1728 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1729 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1730 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
   1731 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
   1732 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
   1733 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1734 ; AVX1-NEXT:    retq
   1735 ;
   1736 ; AVX2-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
   1737 ; AVX2:       # BB#0:
   1738 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1739 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1740 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
   1741 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
   1742 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
   1743 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1744 ; AVX2-NEXT:    retq
   1745   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 15, i32 undef, i32 12, i32 undef, i32 13, i32 undef, i32 14, i32 undef, i32 15>
   1746   ret <16 x i16> %shuffle
   1747 }
   1748 
   1749 define <16 x i16> @shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13(<16 x i16> %a, <16 x i16> %b) {
   1750 ; AVX1-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
   1751 ; AVX1:       # BB#0:
   1752 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1753 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
   1754 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
   1755 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
   1756 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7]
   1757 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
   1758 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1759 ; AVX1-NEXT:    retq
   1760 ;
   1761 ; AVX2-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
   1762 ; AVX2:       # BB#0:
   1763 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1764 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
   1765 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
   1766 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
   1767 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7]
   1768 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
   1769 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1770 ; AVX2-NEXT:    retq
   1771   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 13, i32 11, i32 9, i32 10, i32 8, i32 14, i32 15, i32 12, i32 13>
   1772   ret <16 x i16> %shuffle
   1773 }
   1774 
   1775 define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   1776 ; AVX1-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
   1777 ; AVX1:       # BB#0:
   1778 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1779 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
   1780 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,14,15]
   1781 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1782 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
   1783 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1784 ; AVX1-NEXT:    retq
   1785 ;
   1786 ; AVX2-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
   1787 ; AVX2:       # BB#0:
   1788 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1789 ; AVX2-NEXT:    vpbroadcastw %xmm1, %xmm2
   1790 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,14,15]
   1791 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1792 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
   1793 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1794 ; AVX2-NEXT:    retq
   1795   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 8, i32 12, i32 12, i32 12, i32 12, i32 8, i32 8, i32 8, i32 8>
   1796   ret <16 x i16> %shuffle
   1797 }
   1798 
   1799 define <16 x i16> @shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13(<16 x i16> %a, <16 x i16> %b) {
   1800 ; AVX1-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13:
   1801 ; AVX1:       # BB#0:
   1802 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1803 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
   1804 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
   1805 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,0,3,2]
   1806 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1807 ; AVX1-NEXT:    retq
   1808 ;
   1809 ; AVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13:
   1810 ; AVX2:       # BB#0:
   1811 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1812 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
   1813 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
   1814 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,0,3,2]
   1815 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1816 ; AVX2-NEXT:    retq
   1817   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 13, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
   1818   ret <16 x i16> %shuffle
   1819 }
   1820 
   1821 define <16 x i16> @shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13(<16 x i16> %a, <16 x i16> %b) {
   1822 ; AVX1-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
   1823 ; AVX1:       # BB#0:
   1824 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1825 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
   1826 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
   1827 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
   1828 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7]
   1829 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
   1830 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1831 ; AVX1-NEXT:    retq
   1832 ;
   1833 ; AVX2-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
   1834 ; AVX2:       # BB#0:
   1835 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1836 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
   1837 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
   1838 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
   1839 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7]
   1840 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
   1841 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1842 ; AVX2-NEXT:    retq
   1843   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 13, i32 10, i32 11, i32 8, i32 10, i32 14, i32 15, i32 12, i32 13>
   1844   ret <16 x i16> %shuffle
   1845 }
   1846 
   1847 define <16 x i16> @shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15(<16 x i16> %a, <16 x i16> %b) {
   1848 ; AVX1-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15:
   1849 ; AVX1:       # BB#0:
   1850 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1851 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
   1852 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   1853 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3]
   1854 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7]
   1855 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1856 ; AVX1-NEXT:    retq
   1857 ;
   1858 ; AVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15:
   1859 ; AVX2:       # BB#0:
   1860 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1861 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
   1862 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   1863 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3]
   1864 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7]
   1865 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1866 ; AVX2-NEXT:    retq
   1867   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 15, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 15>
   1868   ret <16 x i16> %shuffle
   1869 }
   1870 
   1871 define <16 x i16> @shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08(<16 x i16> %a, <16 x i16> %b) {
   1872 ; AVX1-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08:
   1873 ; AVX1:       # BB#0:
   1874 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1875 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
   1876 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   1877 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
   1878 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   1879 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   1880 ; AVX1-NEXT:    retq
   1881 ;
   1882 ; AVX2-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08:
   1883 ; AVX2:       # BB#0:
   1884 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1885 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
   1886 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   1887 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
   1888 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   1889 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
   1890 ; AVX2-NEXT:    retq
   1891   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 8, i32 15, i32 13, i32 14, i32 12, i32 11, i32 9, i32 10, i32 8>
   1892   ret <16 x i16> %shuffle
   1893 }
   1894 
   1895 define <16 x i16> @shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08(<16 x i16> %a, <16 x i16> %b) {
   1896 ; AVX1-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
   1897 ; AVX1:       # BB#0:
   1898 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1899 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
   1900 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,2,3]
   1901 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1902 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
   1903 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1904 ; AVX1-NEXT:    retq
   1905 ;
   1906 ; AVX2-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
   1907 ; AVX2:       # BB#0:
   1908 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1909 ; AVX2-NEXT:    vpbroadcastw %xmm1, %xmm2
   1910 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,2,3]
   1911 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1912 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
   1913 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1914 ; AVX2-NEXT:    retq
   1915   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 8, i32 9, i32 8, i32 13, i32 12, i32 13, i32 12, i32 9, i32 8>
   1916   ret <16 x i16> %shuffle
   1917 }
   1918 
   1919 define <16 x i16> @shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08(<16 x i16> %a, <16 x i16> %b) {
   1920 ; AVX1-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
   1921 ; AVX1:       # BB#0:
   1922 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1923 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
   1924 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,2,3]
   1925 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1926 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
   1927 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1928 ; AVX1-NEXT:    retq
   1929 ;
   1930 ; AVX2-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
   1931 ; AVX2:       # BB#0:
   1932 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1933 ; AVX2-NEXT:    vpbroadcastw %xmm1, %xmm2
   1934 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,2,3]
   1935 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1936 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
   1937 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1938 ; AVX2-NEXT:    retq
   1939   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 8, i32 13, i32 12, i32 9, i32 8, i32 13, i32 12, i32 9, i32 8>
   1940   ret <16 x i16> %shuffle
   1941 }
   1942 
   1943 define <16 x i16> @shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12(<16 x i16> %a, <16 x i16> %b) {
   1944 ; AVX1-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
   1945 ; AVX1:       # BB#0:
   1946 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1947 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   1948 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,2,3]
   1949 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1950 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
   1951 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1952 ; AVX1-NEXT:    retq
   1953 ;
   1954 ; AVX2-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
   1955 ; AVX2:       # BB#0:
   1956 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1957 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm2
   1958 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,2,3]
   1959 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1960 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
   1961 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1962 ; AVX2-NEXT:    retq
   1963   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 12, i32 13, i32 12, i32 9, i32 8, i32 9, i32 8, i32 13, i32 12>
   1964   ret <16 x i16> %shuffle
   1965 }
   1966 
   1967 define <16 x i16> @shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08(<16 x i16> %a, <16 x i16> %b) {
   1968 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
   1969 ; AVX1:       # BB#0:
   1970 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1971 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
   1972 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,2,3]
   1973 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1974 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
   1975 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1976 ; AVX1-NEXT:    retq
   1977 ;
   1978 ; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
   1979 ; AVX2:       # BB#0:
   1980 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1981 ; AVX2-NEXT:    vpbroadcastw %xmm1, %xmm2
   1982 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,2,3]
   1983 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1984 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
   1985 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1986 ; AVX2-NEXT:    retq
   1987   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 8>
   1988   ret <16 x i16> %shuffle
   1989 }
   1990 
   1991 define <16 x i16> @shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12(<16 x i16> %a, <16 x i16> %b) {
   1992 ; AVX1-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
   1993 ; AVX1:       # BB#0:
   1994 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1995 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   1996 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,2,3]
   1997 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1998 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
   1999 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2000 ; AVX1-NEXT:    retq
   2001 ;
   2002 ; AVX2-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
   2003 ; AVX2:       # BB#0:
   2004 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2005 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm2
   2006 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,2,3]
   2007 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2008 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
   2009 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2010 ; AVX2-NEXT:    retq
   2011   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 12>
   2012   ret <16 x i16> %shuffle
   2013 }
   2014 
   2015 define <16 x i16> @shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11(<16 x i16> %a, <16 x i16> %b) {
   2016 ; AVX1-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11:
   2017 ; AVX1:       # BB#0:
   2018 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2019 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
   2020 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2021 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2022 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2023 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2024 ; AVX1-NEXT:    retq
   2025 ;
   2026 ; AVX2-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11:
   2027 ; AVX2:       # BB#0:
   2028 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2029 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
   2030 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2031 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2032 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2033 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
   2034 ; AVX2-NEXT:    retq
   2035   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 11, i32 10, i32 14, i32 12, i32 8, i32 13, i32 9, i32 15, i32 11>
   2036   ret <16 x i16> %shuffle
   2037 }
   2038 
   2039 define <16 x i16> @shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11(<16 x i16> %a, <16 x i16> %b) {
   2040 ; AVX1-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11:
   2041 ; AVX1:       # BB#0:
   2042 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2043 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
   2044 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2045 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2046 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2047 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2048 ; AVX1-NEXT:    retq
   2049 ;
   2050 ; AVX2-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11:
   2051 ; AVX2:       # BB#0:
   2052 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2053 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
   2054 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2055 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2056 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2057 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
   2058 ; AVX2-NEXT:    retq
   2059   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 11, i32 10, i32 8, i32 14, i32 12, i32 13, i32 9, i32 15, i32 11>
   2060   ret <16 x i16> %shuffle
   2061 }
   2062 
   2063 define <16 x i16> @shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13(<16 x i16> %a, <16 x i16> %b) {
   2064 ; AVX1-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13:
   2065 ; AVX1:       # BB#0:
   2066 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2067 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
   2068 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2069 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
   2070 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2071 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2072 ; AVX1-NEXT:    retq
   2073 ;
   2074 ; AVX2-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13:
   2075 ; AVX2:       # BB#0:
   2076 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2077 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
   2078 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2079 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
   2080 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2081 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
   2082 ; AVX2-NEXT:    retq
   2083   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 13, i32 10, i32 14, i32 12, i32 8, i32 9, i32 11, i32 15, i32 13>
   2084   ret <16 x i16> %shuffle
   2085 }
   2086 
   2087 define <16 x i16> @shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11(<16 x i16> %a, <16 x i16> %b) {
   2088 ; AVX1-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
   2089 ; AVX1:       # BB#0:
   2090 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2091 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
   2092 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2093 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
   2094 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2095 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2096 ; AVX1-NEXT:    retq
   2097 ;
   2098 ; AVX2-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
   2099 ; AVX2:       # BB#0:
   2100 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2101 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
   2102 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2103 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
   2104 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2105 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
   2106 ; AVX2-NEXT:    retq
   2107   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 11, i32 14, i32 14, i32 15, i32 13, i32 9, i32 14, i32 12, i32 11>
   2108   ret <16 x i16> %shuffle
   2109 }
   2110 
   2111 define <16 x i16> @shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   2112 ; AVX1-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
   2113 ; AVX1:       # BB#0:
   2114 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2115 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   2116 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,14,15]
   2117 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2118 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
   2119 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2120 ; AVX1-NEXT:    retq
   2121 ;
   2122 ; AVX2-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
   2123 ; AVX2:       # BB#0:
   2124 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2125 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm2
   2126 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,14,15]
   2127 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2128 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
   2129 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2130 ; AVX2-NEXT:    retq
   2131   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 12, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
   2132   ret <16 x i16> %shuffle
   2133 }
   2134 
   2135 define <16 x i16> @shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   2136 ; AVX1-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
   2137 ; AVX1:       # BB#0:
   2138 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2139 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   2140 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,14,15]
   2141 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2142 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
   2143 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2144 ; AVX1-NEXT:    retq
   2145 ;
   2146 ; AVX2-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
   2147 ; AVX2:       # BB#0:
   2148 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2149 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm2
   2150 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,14,15]
   2151 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2152 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
   2153 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2154 ; AVX2-NEXT:    retq
   2155   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 12, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
   2156   ret <16 x i16> %shuffle
   2157 }
   2158 
   2159 define <16 x i16> @shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   2160 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
   2161 ; AVX1:       # BB#0:
   2162 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2163 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   2164 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
   2165 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2166 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
   2167 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2168 ; AVX1-NEXT:    retq
   2169 ;
   2170 ; AVX2-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
   2171 ; AVX2:       # BB#0:
   2172 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2173 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm2
   2174 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
   2175 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2176 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
   2177 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2178 ; AVX2-NEXT:    retq
   2179   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 12, i32 8, i32 12, i32 12, i32 8, i32 12, i32 12, i32 12, i32 12>
   2180   ret <16 x i16> %shuffle
   2181 }
   2182 
   2183 define <16 x i16> @shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   2184 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
   2185 ; AVX1:       # BB#0:
   2186 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2187 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
   2188 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,14,15]
   2189 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2190 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
   2191 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2192 ; AVX1-NEXT:    retq
   2193 ;
   2194 ; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
   2195 ; AVX2:       # BB#0:
   2196 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2197 ; AVX2-NEXT:    vpbroadcastw %xmm1, %xmm2
   2198 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,14,15]
   2199 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2200 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
   2201 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2202 ; AVX2-NEXT:    retq
   2203   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 8, i32 8, i32 8>
   2204   ret <16 x i16> %shuffle
   2205 }
   2206 
   2207 define <16 x i16> @shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
   2208 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
   2209 ; AVX1:       # BB#0:
   2210 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2211 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
   2212 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
   2213 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   2214 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
   2215 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7]
   2216 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2217 ; AVX1-NEXT:    retq
   2218 ;
   2219 ; AVX2-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
   2220 ; AVX2:       # BB#0:
   2221 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2222 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
   2223 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
   2224 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   2225 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
   2226 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7]
   2227 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2228 ; AVX2-NEXT:    retq
   2229   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 15, i32 8, i32 12, i32 12, i32 8, i32 12, i32 13, i32 14, i32 15>
   2230   ret <16 x i16> %shuffle
   2231 }
   2232 
   2233 define <16 x i16> @shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   2234 ; AVX1-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
   2235 ; AVX1:       # BB#0:
   2236 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2237 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   2238 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,14,15]
   2239 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2240 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
   2241 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2242 ; AVX1-NEXT:    retq
   2243 ;
   2244 ; AVX2-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
   2245 ; AVX2:       # BB#0:
   2246 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2247 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm2
   2248 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,14,15]
   2249 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2250 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
   2251 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2252 ; AVX2-NEXT:    retq
   2253   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 12, i32 8, i32 undef, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
   2254   ret <16 x i16> %shuffle
   2255 }
   2256 
   2257 define <16 x i16> @shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   2258 ; AVX1-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
   2259 ; AVX1:       # BB#0:
   2260 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2261 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   2262 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
   2263 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2264 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
   2265 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2266 ; AVX1-NEXT:    retq
   2267 ;
   2268 ; AVX2-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
   2269 ; AVX2:       # BB#0:
   2270 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2271 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm2
   2272 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
   2273 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2274 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
   2275 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2276 ; AVX2-NEXT:    retq
   2277   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 12, i32 12, i32 12, i32 undef, i32 8, i32 12, i32 12, i32 12, i32 12>
   2278   ret <16 x i16> %shuffle
   2279 }
   2280 
   2281 define <16 x i16> @shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   2282 ; AVX1-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
   2283 ; AVX1:       # BB#0:
   2284 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2285 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   2286 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
   2287 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2288 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
   2289 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2290 ; AVX1-NEXT:    retq
   2291 ;
   2292 ; AVX2-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
   2293 ; AVX2:       # BB#0:
   2294 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2295 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm2
   2296 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
   2297 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2298 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
   2299 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2300 ; AVX2-NEXT:    retq
   2301   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 12, i32 undef, i32 12, i32 12, i32 8, i32 12, i32 12, i32 12, i32 12>
   2302   ret <16 x i16> %shuffle
   2303 }
   2304 
   2305 define <16 x i16> @shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   2306 ; AVX1-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu:
   2307 ; AVX1:       # BB#0:
   2308 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2309 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
   2310 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
   2311 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2312 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2313 ; AVX1-NEXT:    retq
   2314 ;
   2315 ; AVX2-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu:
   2316 ; AVX2:       # BB#0:
   2317 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15,16,17,18,19,20,21,30,31,20,21,30,31,28,29,30,31]
   2318 ; AVX2-NEXT:    retq
   2319   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
   2320   ret <16 x i16> %shuffle
   2321 }
   2322 
   2323 define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11(<16 x i16> %a, <16 x i16> %b) {
   2324 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11:
   2325 ; AVX1:       # BB#0:
   2326 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2327 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
   2328 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2329 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
   2330 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2331 ; AVX1-NEXT:    retq
   2332 ;
   2333 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11:
   2334 ; AVX2:       # BB#0:
   2335 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2336 ; AVX2-NEXT:    vpbroadcastq %xmm1, %xmm2
   2337 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2338 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
   2339 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2340 ; AVX2-NEXT:    retq
   2341   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11>
   2342   ret <16 x i16> %shuffle
   2343 }
   2344 
   2345 define <16 x i16> @shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   2346 ; AVX1-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu:
   2347 ; AVX1:       # BB#0:
   2348 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2349 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
   2350 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
   2351 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2352 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2353 ; AVX1-NEXT:    retq
   2354 ;
   2355 ; AVX2-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu:
   2356 ; AVX2:       # BB#0:
   2357 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3,24,25,26,27,28,29,22,23,24,25,26,27,16,17,18,19]
   2358 ; AVX2-NEXT:    retq
   2359   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
   2360   ret <16 x i16> %shuffle
   2361 }
   2362 
   2363 define <16 x i16> @shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11(<16 x i16> %a, <16 x i16> %b) {
   2364 ; AVX1-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11:
   2365 ; AVX1:       # BB#0:
   2366 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2367 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
   2368 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2369 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2370 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2371 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2372 ; AVX1-NEXT:    retq
   2373 ;
   2374 ; AVX2-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11:
   2375 ; AVX2:       # BB#0:
   2376 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2377 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
   2378 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2379 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2380 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2381 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
   2382 ; AVX2-NEXT:    retq
   2383   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 11, i32 8, i32 9, i32 10, i32 15, i32 12, i32 13, i32 14, i32 11>
   2384   ret <16 x i16> %shuffle
   2385 }
   2386 
   2387 define <16 x i16> @shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15(<16 x i16> %a, <16 x i16> %b) {
   2388 ; AVX1-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15:
   2389 ; AVX1:       # BB#0:
   2390 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2391 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,2,3]
   2392 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   2393 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
   2394 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2395 ; AVX1-NEXT:    retq
   2396 ;
   2397 ; AVX2-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15:
   2398 ; AVX2:       # BB#0:
   2399 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2400 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,2,3]
   2401 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   2402 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
   2403 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2404 ; AVX2-NEXT:    retq
   2405   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 15, i32 12, i32 13, i32 14, i32 11, i32 8, i32 9, i32 10, i32 15>
   2406   ret <16 x i16> %shuffle
   2407 }
   2408 
   2409 define <16 x i16> @shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13(<16 x i16> %a, <16 x i16> %b) {
   2410 ; AVX1-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
   2411 ; AVX1:       # BB#0:
   2412 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2413 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
   2414 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2415 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
   2416 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2417 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2418 ; AVX1-NEXT:    retq
   2419 ;
   2420 ; AVX2-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
   2421 ; AVX2:       # BB#0:
   2422 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2423 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
   2424 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2425 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
   2426 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2427 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
   2428 ; AVX2-NEXT:    retq
   2429   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 13, i32 11, i32 15, i32 9, i32 8, i32 10, i32 15, i32 11, i32 13>
   2430   ret <16 x i16> %shuffle
   2431 }
   2432 
   2433 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) {
   2434 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
   2435 ; AVX1:       # BB#0:
   2436 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2437 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   2438 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
   2439 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   2440 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15]
   2441 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2442 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2443 ; AVX1-NEXT:    retq
   2444 ;
   2445 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
   2446 ; AVX2:       # BB#0:
   2447 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
   2448 ; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
   2449 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   2450 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,2,4,5,6,7]
   2451 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,7]
   2452 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
   2453 ; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
   2454 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
   2455 ; AVX2-NEXT:    retq
   2456   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 27, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
   2457   ret <16 x i16> %shuffle
   2458 }
   2459 
   2460 define <16 x i16> @shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31(<16 x i16> %a, <16 x i16> %b) {
   2461 ; AVX1-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31:
   2462 ; AVX1:       # BB#0:
   2463 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2464 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   2465 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm3[2,3,0,1]
   2466 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
   2467 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
   2468 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15]
   2469 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2470 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2471 ; AVX1-NEXT:    retq
   2472 ;
   2473 ; AVX2-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31:
   2474 ; AVX2:       # BB#0:
   2475 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
   2476 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15]
   2477 ; AVX2-NEXT:    vpshufb %xmm1, %xmm0, %xmm2
   2478 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
   2479 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm0[7]
   2480 ; AVX2-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
   2481 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm2, %ymm0
   2482 ; AVX2-NEXT:    retq
   2483   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 20, i32 1, i32 21, i32 2, i32 22, i32 3, i32 31, i32 8, i32 28, i32 9, i32 29, i32 10, i32 30, i32 11, i32 31>
   2484   ret <16 x i16> %shuffle
   2485 }
   2486 
   2487 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
   2488 ; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
   2489 ; AVX1:       # BB#0:
   2490 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2491 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   2492 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
   2493 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   2494 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15]
   2495 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   2496 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2497 ; AVX1-NEXT:    retq
   2498 ;
   2499 ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
   2500 ; AVX2:       # BB#0:
   2501 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm0 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
   2502 ; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
   2503 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   2504 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,2,4,5,6,7]
   2505 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,7]
   2506 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
   2507 ; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
   2508 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
   2509 ; AVX2-NEXT:    retq
   2510   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 31, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
   2511   ret <16 x i16> %shuffle
   2512 }
   2513 
   2514 define <16 x i16> @shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27(<16 x i16> %a, <16 x i16> %b) {
   2515 ; AVX1-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27:
   2516 ; AVX1:       # BB#0:
   2517 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2518 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   2519 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
   2520 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
   2521 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   2522 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15]
   2523 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   2524 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2525 ; AVX1-NEXT:    retq
   2526 ;
   2527 ; AVX2-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27:
   2528 ; AVX2:       # BB#0:
   2529 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
   2530 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2531 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2532 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [8,9,0,1,10,11,2,3,12,13,4,5,14,15,6,7]
   2533 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2534 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
   2535 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2536 ; AVX2-NEXT:    retq
   2537   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 16, i32 5, i32 17, i32 6, i32 18, i32 7, i32 27, i32 12, i32 24, i32 13, i32 25, i32 14, i32 26, i32 15, i32 27>
   2538   ret <16 x i16> %shuffle
   2539 }
   2540 
   2541 define <16 x i16> @shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
   2542 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
   2543 ; AVX1:       # BB#0:
   2544 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2545 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[0,3,2,3]
   2546 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
   2547 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm4[0,3,2,3]
   2548 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
   2549 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,1,4,5,6,7]
   2550 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
   2551 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7]
   2552 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15]
   2553 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
   2554 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2555 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2556 ; AVX1-NEXT:    retq
   2557 ;
   2558 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
   2559 ; AVX2:       # BB#0:
   2560 ; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
   2561 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,1,4,5,6,7]
   2562 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
   2563 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3]
   2564 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[0,0,2,1,4,5,6,7]
   2565 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
   2566 ; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
   2567 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15]
   2568 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,7,7,8,9,10,11,14,13,15,15]
   2569 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
   2570 ; AVX2-NEXT:    retq
   2571   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 6, i32 22, i32 7, i32 31, i32 8, i32 24, i32 9, i32 25, i32 14, i32 30, i32 15, i32 31>
   2572   ret <16 x i16> %shuffle
   2573 }
   2574 
   2575 define <16 x i16> @shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25(<16 x i16> %a, <16 x i16> %b) {
   2576 ; AVX1-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
   2577 ; AVX1:       # BB#0:
   2578 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2579 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[2,0,2,3]
   2580 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
   2581 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm4[0,3,2,3]
   2582 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
   2583 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7]
   2584 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,0,1,2,3,2,3,0,1,12,13,2,3]
   2585 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
   2586 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2587 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2588 ; AVX1-NEXT:    retq
   2589 ;
   2590 ; AVX2-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
   2591 ; AVX2:       # BB#0:
   2592 ; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
   2593 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [8,9,8,9,4,5,10,11,0,1,0,1,12,13,2,3]
   2594 ; AVX2-NEXT:    vpshufb %xmm3, %xmm2, %xmm4
   2595 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7]
   2596 ; AVX2-NEXT:    vpshufb %xmm3, %xmm1, %xmm1
   2597 ; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm1, %ymm1
   2598 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15]
   2599 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,7,7,8,9,10,11,14,13,15,15]
   2600 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
   2601 ; AVX2-NEXT:    retq
   2602   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 20, i32 1, i32 21, i32 6, i32 16, i32 7, i32 25, i32 8, i32 28, i32 9, i32 29, i32 14, i32 24, i32 15, i32 25>
   2603   ret <16 x i16> %shuffle
   2604 }
   2605 
   2606 define <16 x i16> @shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26(<16 x i16> %a, <16 x i16> %b) {
   2607 ; AVX1-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
   2608 ; AVX1:       # BB#0:
   2609 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2610 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   2611 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,12,13,10,11,8,9,10,11,12,13,10,11]
   2612 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm3 = xmm0[1,0,3,2,4,5,6,7]
   2613 ; AVX1-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
   2614 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   2615 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   2616 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
   2617 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
   2618 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   2619 ; AVX1-NEXT:    retq
   2620 ;
   2621 ; AVX2-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
   2622 ; AVX2:       # BB#0:
   2623 ; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
   2624 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   2625 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,1,2,0,4,5,6,7]
   2626 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,5]
   2627 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[0,1,2,3,2,3,0,1,8,9,10,11,6,7,4,5]
   2628 ; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
   2629 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,4,5,6,7,6,7,4,5,4,5,6,7,18,19,16,17,20,21,22,23,22,23,20,21,20,21,22,23]
   2630 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
   2631 ; AVX2-NEXT:    retq
   2632   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 0, i32 17, i32 16, i32 3, i32 2, i32 19, i32 26, i32 9, i32 8, i32 25, i32 24, i32 11, i32 10, i32 27, i32 26>
   2633   ret <16 x i16> %shuffle
   2634 }
   2635 
   2636 define <16 x i16> @shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11(<16 x i16> %a, <16 x i16> %b) {
   2637 ; AVX1-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
   2638 ; AVX1:       # BB#0:
   2639 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2640 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   2641 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
   2642 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   2643 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15]
   2644 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
   2645 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2646 ; AVX1-NEXT:    retq
   2647 ;
   2648 ; AVX2-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
   2649 ; AVX2:       # BB#0:
   2650 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11]
   2651 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
   2652 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   2653 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
   2654 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
   2655 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
   2656 ; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
   2657 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
   2658 ; AVX2-NEXT:    retq
   2659   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 0, i32 17, i32 1, i32 18, i32 2, i32 19, i32 11, i32 24, i32 8, i32 25, i32 9, i32 26, i32 10, i32 27, i32 11>
   2660   ret <16 x i16> %shuffle
   2661 }
   2662 
   2663 define <16 x i16> @shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15(<16 x i16> %a, <16 x i16> %b) {
   2664 ; AVX1-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
   2665 ; AVX1:       # BB#0:
   2666 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2667 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   2668 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
   2669 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
   2670 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15]
   2671 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
   2672 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2673 ; AVX1-NEXT:    retq
   2674 ;
   2675 ; AVX2-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
   2676 ; AVX2:       # BB#0:
   2677 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15]
   2678 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
   2679 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
   2680 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
   2681 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
   2682 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
   2683 ; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
   2684 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
   2685 ; AVX2-NEXT:    retq
   2686   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 4, i32 21, i32 5, i32 22, i32 6, i32 23, i32 15, i32 28, i32 12, i32 29, i32 13, i32 30, i32 14, i32 31, i32 15>
   2687   ret <16 x i16> %shuffle
   2688 }
   2689 
   2690 define <16 x i16> @shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31(<16 x i16> %a, <16 x i16> %b) {
   2691 ; AVX1-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31:
   2692 ; AVX1:       # BB#0:
   2693 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2694 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[0,2,1,3,4,5,6,7]
   2695 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   2696 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
   2697 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm4 = xmm4[0,2,1,3,4,5,6,7]
   2698 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
   2699 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
   2700 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,4,5,14,15,0,1,4,5,4,5,6,7]
   2701 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
   2702 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   2703 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2704 ; AVX1-NEXT:    retq
   2705 ;
   2706 ; AVX2-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31:
   2707 ; AVX2:       # BB#0:
   2708 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
   2709 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2710 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
   2711 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
   2712 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   2713 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
   2714 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
   2715 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2716 ; AVX2-NEXT:    retq
   2717   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 2, i32 1, i32 3, i32 20, i32 22, i32 21, i32 31, i32 8, i32 10, i32 9, i32 11, i32 28, i32 30, i32 29, i32 31>
   2718   ret <16 x i16> %shuffle
   2719 }
   2720 
   2721 define <16 x i16> @shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   2722 ; AVX1-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu:
   2723 ; AVX1:       # BB#0:
   2724 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   2725 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,1,2,3]
   2726 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[0,0,3,2,4,5,6,7]
   2727 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
   2728 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   2729 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   2730 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
   2731 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
   2732 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
   2733 ; AVX1-NEXT:    retq
   2734 ;
   2735 ; AVX2-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu:
   2736 ; AVX2:       # BB#0:
   2737 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15]
   2738 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,1,2,3,6,5,6,7]
   2739 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,3,2,4,5,6,7,8,8,11,10,12,13,14,15]
   2740 ; AVX2-NEXT:    retq
   2741   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 3, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 12, i32 11, i32 26, i32 undef, i32 undef, i32 undef, i32 undef>
   2742   ret <16 x i16> %shuffle
   2743 }
   2744 
   2745 define <16 x i16> @shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   2746 ; AVX1-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu:
   2747 ; AVX1:       # BB#0:
   2748 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2749 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   2750 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
   2751 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
   2752 ; AVX1-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
   2753 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
   2754 ; AVX1-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
   2755 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2756 ; AVX1-NEXT:    retq
   2757 ;
   2758 ; AVX2-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu:
   2759 ; AVX2:       # BB#0:
   2760 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
   2761 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3,16,17,22,23,20,21,26,27,16,17,26,27,16,17,18,19]
   2762 ; AVX2-NEXT:    retq
   2763   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 3, i32 2, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 11, i32 10, i32 29, i32 undef, i32 undef, i32 undef, i32 undef>
   2764   ret <16 x i16> %shuffle
   2765 }
   2766 
   2767 define <16 x i16> @shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   2768 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu:
   2769 ; AVX1:       # BB#0:
   2770 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm1[0,2,2,3,4,6,6,7]
   2771 ; AVX1-NEXT:    retq
   2772 ;
   2773 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu:
   2774 ; AVX2:       # BB#0:
   2775 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm1[0,2,2,3,4,6,6,7]
   2776 ; AVX2-NEXT:    retq
   2777   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 29, i32 undef, i32 undef, i32 undef, i32 undef>
   2778   ret <16 x i16> %shuffle
   2779 }
   2780 
   2781 define <16 x i16> @shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   2782 ; AVX1-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu:
   2783 ; AVX1:       # BB#0:
   2784 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2785 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   2786 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
   2787 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[3],xmm2[4,5,6,7]
   2788 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
   2789 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2790 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2791 ; AVX1-NEXT:    retq
   2792 ;
   2793 ; AVX2-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu:
   2794 ; AVX2:       # BB#0:
   2795 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
   2796 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
   2797 ; AVX2-NEXT:    retq
   2798   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 29, i32 undef, i32 undef, i32 undef, i32 undef>
   2799   ret <16 x i16> %shuffle
   2800 }
   2801 
   2802 define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11(<16 x i16> %a, <16 x i16> %b) {
   2803 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11:
   2804 ; AVX1:       # BB#0:
   2805 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2806 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   2807 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
   2808 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm0[7]
   2809 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
   2810 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2811 ; AVX1-NEXT:    retq
   2812 ;
   2813 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11:
   2814 ; AVX2:       # BB#0:
   2815 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,2]
   2816 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
   2817 ; AVX2-NEXT:    retq
   2818   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 20, i32 21, i32 22, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 11>
   2819   ret <16 x i16> %shuffle
   2820 }
   2821 
   2822 define <16 x i16> @shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   2823 ; AVX1-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu:
   2824 ; AVX1:       # BB#0:
   2825 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2826 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   2827 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
   2828 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[3],xmm3[4,5,6,7]
   2829 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
   2830 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
   2831 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2832 ; AVX1-NEXT:    retq
   2833 ;
   2834 ; AVX2-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu:
   2835 ; AVX2:       # BB#0:
   2836 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7]
   2837 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6,7,8,9,10],ymm0[11],ymm1[12,13,14,15]
   2838 ; AVX2-NEXT:    retq
   2839   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 21, i32 22, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
   2840   ret <16 x i16> %shuffle
   2841 }
   2842 
   2843 define <16 x i16> @shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11(<16 x i16> %a, <16 x i16> %b) {
   2844 ; AVX1-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11:
   2845 ; AVX1:       # BB#0:
   2846 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2847 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
   2848 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm1[0,2,2,3]
   2849 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3,4,5,6],xmm0[7]
   2850 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
   2851 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
   2852 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
   2853 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2854 ; AVX1-NEXT:    retq
   2855 ;
   2856 ; AVX2-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11:
   2857 ; AVX2:       # BB#0:
   2858 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
   2859 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2860 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2861 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
   2862 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2863 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
   2864 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2865 ; AVX2-NEXT:    retq
   2866   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 21, i32 20, i32 21, i32 22, i32 11, i32 8, i32 9, i32 10, i32 29, i32 28, i32 29, i32 30, i32 11>
   2867   ret <16 x i16> %shuffle
   2868 }
   2869 
   2870 define <16 x i16> @shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15(<16 x i16> %a, <16 x i16> %b) {
   2871 ; AVX1-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15:
   2872 ; AVX1:       # BB#0:
   2873 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2874 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   2875 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2,3],xmm2[4,5,6],xmm3[7]
   2876 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
   2877 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
   2878 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2879 ; AVX1-NEXT:    retq
   2880 ;
   2881 ; AVX2-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15:
   2882 ; AVX2:       # BB#0:
   2883 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
   2884 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4,5,6],ymm0[7,8],ymm1[9],ymm0[10,11],ymm1[12,13,14],ymm0[15]
   2885 ; AVX2-NEXT:    retq
   2886   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 20, i32 21, i32 22, i32 15, i32 8, i32 25, i32 10, i32 11, i32 28, i32 29, i32 30, i32 15>
   2887   ret <16 x i16> %shuffle
   2888 }
   2889 
   2890 define <16 x i16> @shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25(<16 x i16> %a, <16 x i16> %b) {
   2891 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
   2892 ; AVX1:       # BB#0:
   2893 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
   2894 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
   2895 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2896 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[0,1,2,1,4,5,6,7]
   2897 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,7,7]
   2898 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm1[7]
   2899 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
   2900 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
   2901 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   2902 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2903 ; AVX1-NEXT:    retq
   2904 ;
   2905 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
   2906 ; AVX2:       # BB#0:
   2907 ; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm2
   2908 ; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
   2909 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,1,2,1,4,5,6,7,8,9,10,9,12,13,14,15]
   2910 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,7,7,8,9,10,11,12,13,15,15]
   2911 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
   2912 ; AVX2-NEXT:    retq
   2913   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 25, i32 undef, i32 undef, i32 undef, i32 9, i32 undef, i32 13, i32 15, i32 25>
   2914   ret <16 x i16> %shuffle
   2915 }
   2916 
   2917 define <16 x i16> @shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu(<16 x i16> %a, <16 x i16> %b) {
   2918 ; AVX1-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu:
   2919 ; AVX1:       # BB#0:
   2920 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2921 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
   2922 ; AVX1-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
   2923 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
   2924 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm4[2,2,3,3]
   2925 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7]
   2926 ; AVX1-NEXT:    vpshufb %xmm3, %xmm1, %xmm1
   2927 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
   2928 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
   2929 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2930 ; AVX1-NEXT:    retq
   2931 ;
   2932 ; AVX2-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu:
   2933 ; AVX2:       # BB#0:
   2934 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5,16,17,20,21,20,21,22,23,16,17,20,21,24,25,20,21]
   2935 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
   2936 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
   2937 ; AVX2-NEXT:    retq
   2938   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 16, i32 18, i32 20, i32 undef, i32 undef, i32 undef, i32 12, i32 undef, i32 24, i32 26, i32 28, i32 undef>
   2939   ret <16 x i16> %shuffle
   2940 }
   2941 
   2942 define <16 x i16> @shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12(<16 x i16> %a, <16 x i16> %b) {
   2943 ; AVX1-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12:
   2944 ; AVX1:       # BB#0:
   2945 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2946 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   2947 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
   2948 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
   2949 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
   2950 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   2951 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7]
   2952 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2953 ; AVX1-NEXT:    retq
   2954 ;
   2955 ; AVX2-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12:
   2956 ; AVX2:       # BB#0:
   2957 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5,6,7],ymm0[8,9,10,11,12],ymm1[13,14,15]
   2958 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2959 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
   2960 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   2961 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   2962 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2963 ; AVX2-NEXT:    retq
   2964   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 12, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12>
   2965   ret <16 x i16> %shuffle
   2966 }
   2967 
   2968 define <16 x i16> @shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) {
   2969 ; AVX1-LABEL: shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu:
   2970 ; AVX1:       # BB#0:
   2971 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2972 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   2973 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
   2974 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
   2975 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2976 ; AVX1-NEXT:    retq
   2977 ;
   2978 ; AVX2-LABEL: shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu:
   2979 ; AVX2:       # BB#0:
   2980 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[10,11,12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9],ymm1[26,27,28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25]
   2981 ; AVX2-NEXT:    retq
   2982   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 22, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 30, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
   2983   ret <16 x i16> %shuffle
   2984 }
   2985 
   2986 define <16 x i16> @shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12(<16 x i16> %a, <16 x i16> %b) {
   2987 ; AVX1-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12:
   2988 ; AVX1:       # BB#0:
   2989 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2990 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
   2991 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   2992 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   2993 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2994 ; AVX1-NEXT:    retq
   2995 ;
   2996 ; AVX2-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12:
   2997 ; AVX2:       # BB#0:
   2998 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2999 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
   3000 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   3001 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   3002 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   3003 ; AVX2-NEXT:    retq
   3004   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12>
   3005   ret <16 x i16> %shuffle
   3006 }
   3007 
   3008 define <16 x i16> @shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) {
   3009 ; AVX1-LABEL: shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu:
   3010 ; AVX1:       # BB#0:
   3011 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   3012 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   3013 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   3014 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   3015 ; AVX1-NEXT:    retq
   3016 ;
   3017 ; AVX2-LABEL: shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu:
   3018 ; AVX2:       # BB#0:
   3019 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
   3020 ; AVX2-NEXT:    retq
   3021   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 14, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
   3022   ret <16 x i16> %shuffle
   3023 }
   3024 
   3025 define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) {
   3026 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu:
   3027 ; AVX1:       # BB#0:
   3028 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
   3029 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   3030 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
   3031 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   3032 ; AVX1-NEXT:    retq
   3033 ;
   3034 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu:
   3035 ; AVX2:       # BB#0:
   3036 ; AVX2-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9],zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25]
   3037 ; AVX2-NEXT:    retq
   3038   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
   3039   ret <16 x i16> %shuffle
   3040 }
   3041 
   3042 define <16 x i16> @shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10(<16 x i16> %a, <16 x i16> %b) {
   3043 ; AVX1-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10:
   3044 ; AVX1:       # BB#0:
   3045 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   3046 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   3047 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
   3048 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
   3049 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,4,5,6,7,8,9,0,1,4,5,10,11]
   3050 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   3051 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4],xmm0[5,6,7]
   3052 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3053 ; AVX1-NEXT:    retq
   3054 ;
   3055 ; AVX2-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10:
   3056 ; AVX2:       # BB#0:
   3057 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7],ymm0[8,9,10],ymm1[11,12,13,14,15]
   3058 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   3059 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   3060 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3061 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3062 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   3063 ; AVX2-NEXT:    retq
   3064   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 10, i32 27, i32 28, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10>
   3065   ret <16 x i16> %shuffle
   3066 }
   3067 
   3068 define <16 x i16> @shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu(<16 x i16> %a, <16 x i16> %b) {
   3069 ; AVX1-LABEL: shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu:
   3070 ; AVX1:       # BB#0:
   3071 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   3072 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   3073 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
   3074 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
   3075 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3076 ; AVX1-NEXT:    retq
   3077 ;
   3078 ; AVX2-LABEL: shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu:
   3079 ; AVX2:       # BB#0:
   3080 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3,4,5],ymm1[22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19,20,21]
   3081 ; AVX2-NEXT:    retq
   3082   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 20, i32 21, i32 22, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 undef, i32 undef, i32 9, i32 undef>
   3083   ret <16 x i16> %shuffle
   3084 }
   3085 
   3086 define <16 x i16> @shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10(<16 x i16> %a, <16 x i16> %b) {
   3087 ; AVX1-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10:
   3088 ; AVX1:       # BB#0:
   3089 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   3090 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   3091 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3092 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3093 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   3094 ; AVX1-NEXT:    retq
   3095 ;
   3096 ; AVX2-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10:
   3097 ; AVX2:       # BB#0:
   3098 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   3099 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   3100 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3101 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3102 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   3103 ; AVX2-NEXT:    retq
   3104   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10>
   3105   ret <16 x i16> %shuffle
   3106 }
   3107 
   3108 define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu(<16 x i16> %a, <16 x i16> %b) {
   3109 ; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu:
   3110 ; AVX1:       # BB#0:
   3111 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3112 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   3113 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3114 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   3115 ; AVX1-NEXT:    retq
   3116 ;
   3117 ; AVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu:
   3118 ; AVX2:       # BB#0:
   3119 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
   3120 ; AVX2-NEXT:    retq
   3121   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 9, i32 undef>
   3122   ret <16 x i16> %shuffle
   3123 }
   3124 
   3125 define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   3126 ; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu:
   3127 ; AVX1:       # BB#0:
   3128 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   3129 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   3130 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   3131 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   3132 ; AVX1-NEXT:    retq
   3133 ;
   3134 ; AVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu:
   3135 ; AVX2:       # BB#0:
   3136 ; AVX2-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,ymm0[22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero
   3137 ; AVX2-NEXT:    retq
   3138   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 undef, i32 undef>
   3139   ret <16 x i16> %shuffle
   3140 }
   3141 
   3142 define <16 x i16> @shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26(<16 x i16> %a, <16 x i16> %b) {
   3143 ; AVX1-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26:
   3144 ; AVX1:       # BB#0:
   3145 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   3146 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   3147 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
   3148 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
   3149 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,8,9,0,1,4,5,10,11]
   3150 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   3151 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5,6,7]
   3152 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3153 ; AVX1-NEXT:    retq
   3154 ;
   3155 ; AVX2-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26:
   3156 ; AVX2:       # BB#0:
   3157 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15]
   3158 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   3159 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   3160 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3161 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3162 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   3163 ; AVX2-NEXT:    retq
   3164   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 26, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26>
   3165   ret <16 x i16> %shuffle
   3166 }
   3167 
   3168 define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu(<16 x i16> %a, <16 x i16> %b) {
   3169 ; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu:
   3170 ; AVX1:       # BB#0:
   3171 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   3172 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   3173 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
   3174 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
   3175 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3176 ; AVX1-NEXT:    retq
   3177 ;
   3178 ; AVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu:
   3179 ; AVX2:       # BB#0:
   3180 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3,4,5],ymm0[22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19,20,21]
   3181 ; AVX2-NEXT:    retq
   3182   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 17, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 25, i32 undef>
   3183   ret <16 x i16> %shuffle
   3184 }
   3185 
   3186 define <16 x i16> @shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28(<16 x i16> %a, <16 x i16> %b) {
   3187 ; AVX1-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28:
   3188 ; AVX1:       # BB#0:
   3189 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   3190 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   3191 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
   3192 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
   3193 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
   3194 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   3195 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3,4,5,6,7]
   3196 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3197 ; AVX1-NEXT:    retq
   3198 ;
   3199 ; AVX2-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28:
   3200 ; AVX2:       # BB#0:
   3201 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4],ymm0[5,6,7],ymm1[8,9,10,11,12],ymm0[13,14,15]
   3202 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   3203 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
   3204 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   3205 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   3206 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   3207 ; AVX2-NEXT:    retq
   3208   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 28, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28>
   3209   ret <16 x i16> %shuffle
   3210 }
   3211 
   3212 define <16 x i16> @shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu(<16 x i16> %a, <16 x i16> %b) {
   3213 ; AVX1-LABEL: shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu:
   3214 ; AVX1:       # BB#0:
   3215 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   3216 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   3217 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
   3218 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
   3219 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3220 ; AVX1-NEXT:    retq
   3221 ;
   3222 ; AVX2-LABEL: shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu:
   3223 ; AVX2:       # BB#0:
   3224 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9],ymm0[26,27,28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25]
   3225 ; AVX2-NEXT:    retq
   3226   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 14, i32 undef, i32 undef, i32 25, i32 26, i32 27, i32 undef>
   3227   ret <16 x i16> %shuffle
   3228 }
   3229 
   3230 define <16 x i16> @shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu(<16 x i16> %a, <16 x i16> %b) {
   3231 ; AVX1-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu:
   3232 ; AVX1:       # BB#0:
   3233 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   3234 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   3235 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   3236 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,5,4,4]
   3237 ; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
   3238 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   3239 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
   3240 ; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
   3241 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3242 ; AVX1-NEXT:    retq
   3243 ;
   3244 ; AVX2-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu:
   3245 ; AVX2:       # BB#0:
   3246 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6],ymm1[7],ymm0[8,9,10,11],ymm1[12],ymm0[13,14],ymm1[15]
   3247 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,6,7,6,7,8,9,8,9,10,11,14,15,30,31,30,31,22,23,22,23,24,25,24,25,26,27,30,31]
   3248 ; AVX2-NEXT:    retq
   3249   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 undef, i32 3, i32 undef, i32 20, i32 20, i32 5, i32 undef, i32 31, i32 undef, i32 11, i32 undef, i32 28, i32 28, i32 13, i32 undef>
   3250   ret <16 x i16> %shuffle
   3251 }
   3252 
   3253 define <16 x i16> @insert_v16i16_0elt_into_zero_vector(i16* %ptr) {
   3254 ; ALL-LABEL: insert_v16i16_0elt_into_zero_vector:
   3255 ; ALL:       # BB#0:
   3256 ; ALL-NEXT:    movzwl (%rdi), %eax
   3257 ; ALL-NEXT:    vmovd %eax, %xmm0
   3258 ; ALL-NEXT:    retq
   3259   %val = load i16, i16* %ptr
   3260   %i0 = insertelement <16 x i16> zeroinitializer, i16 %val, i32 0
   3261   ret <16 x i16> %i0
   3262 }
   3263 
   3264