Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
      3 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
      4 
      5 target triple = "x86_64-unknown-unknown"
      6 
      7 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
      8 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
      9 ; AVX1:       # BB#0:
     10 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
     11 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
     12 ; AVX1-NEXT:    retq
     13 ;
     14 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
     15 ; AVX2:       # BB#0:
     16 ; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
     17 ; AVX2-NEXT:    retq
     18   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
     19   ret <16 x i16> %shuffle
     20 }
     21 
     22 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00(<16 x i16> %a, <16 x i16> %b) {
     23 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
     24 ; AVX1:       # BB#0:
     25 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
     26 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
     27 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,4,4,4]
     28 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,5,4]
     29 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     30 ; AVX1-NEXT:    retq
     31 ;
     32 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
     33 ; AVX2:       # BB#0:
     34 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
     35 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
     36 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
     37 ; AVX2-NEXT:    retq
     38   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
     39   ret <16 x i16> %shuffle
     40 }
     41 
     42 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00(<16 x i16> %a, <16 x i16> %b) {
     43 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
     44 ; AVX1:       # BB#0:
     45 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
     46 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
     47 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     48 ; AVX1-NEXT:    retq
     49 ;
     50 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
     51 ; AVX2:       # BB#0:
     52 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
     53 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
     54 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
     55 ; AVX2-NEXT:    retq
     56   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
     57   ret <16 x i16> %shuffle
     58 }
     59 
     60 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00(<16 x i16> %a, <16 x i16> %b) {
     61 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
     62 ; AVX1:       # BB#0:
     63 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
     64 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
     65 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     66 ; AVX1-NEXT:    retq
     67 ;
     68 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
     69 ; AVX2:       # BB#0:
     70 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
     71 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
     72 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
     73 ; AVX2-NEXT:    retq
     74   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
     75   ret <16 x i16> %shuffle
     76 }
     77 
     78 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
     79 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
     80 ; AVX1:       # BB#0:
     81 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
     82 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
     83 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     84 ; AVX1-NEXT:    retq
     85 ;
     86 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
     87 ; AVX2:       # BB#0:
     88 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
     89 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
     90 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
     91 ; AVX2-NEXT:    retq
     92   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
     93   ret <16 x i16> %shuffle
     94 }
     95 
     96 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
     97 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
     98 ; AVX1:       # BB#0:
     99 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    100 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
    101 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    102 ; AVX1-NEXT:    retq
    103 ;
    104 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
    105 ; AVX2:       # BB#0:
    106 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
    107 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
    108 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
    109 ; AVX2-NEXT:    retq
    110   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
    111   ret <16 x i16> %shuffle
    112 }
    113 
    114 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    115 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
    116 ; AVX1:       # BB#0:
    117 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    118 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
    119 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    120 ; AVX1-NEXT:    retq
    121 ;
    122 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
    123 ; AVX2:       # BB#0:
    124 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
    125 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
    126 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
    127 ; AVX2-NEXT:    retq
    128   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    129   ret <16 x i16> %shuffle
    130 }
    131 
    132 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    133 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
    134 ; AVX1:       # BB#0:
    135 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    136 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    137 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    138 ; AVX1-NEXT:    retq
    139 ;
    140 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
    141 ; AVX2:       # BB#0:
    142 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
    143 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    144 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
    145 ; AVX2-NEXT:    retq
    146   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    147   ret <16 x i16> %shuffle
    148 }
    149 
    150 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    151 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
    152 ; AVX1:       # BB#0:
    153 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    154 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    155 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3]
    156 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    157 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    158 ; AVX1-NEXT:    retq
    159 ;
    160 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
    161 ; AVX2:       # BB#0:
    162 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    163 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    164 ; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
    165 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
    166 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
    167 ; AVX2-NEXT:    retq
    168   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    169   ret <16 x i16> %shuffle
    170 }
    171 
    172 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    173 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
    174 ; AVX1:       # BB#0:
    175 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    176 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    177 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,6,7,0,1]
    178 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    179 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    180 ; AVX1-NEXT:    retq
    181 ;
    182 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
    183 ; AVX2:       # BB#0:
    184 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    185 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,0,255,255,u,u,u,u,u,u,u,u,u,u,u,u,255,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
    186 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
    187 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    188 ; AVX2-NEXT:    retq
    189   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    190   ret <16 x i16> %shuffle
    191 }
    192 
    193 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    194 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
    195 ; AVX1:       # BB#0:
    196 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    197 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    198 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1]
    199 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    200 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    201 ; AVX1-NEXT:    retq
    202 ;
    203 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
    204 ; AVX2:       # BB#0:
    205 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    206 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
    207 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    208 ; AVX2-NEXT:    retq
    209   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    210   ret <16 x i16> %shuffle
    211 }
    212 
    213 define <16 x i16> @shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    214 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
    215 ; AVX1:       # BB#0:
    216 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    217 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    218 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1]
    219 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    220 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    221 ; AVX1-NEXT:    retq
    222 ;
    223 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
    224 ; AVX2:       # BB#0:
    225 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    226 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
    227 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    228 ; AVX2-NEXT:    retq
    229   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    230   ret <16 x i16> %shuffle
    231 }
    232 
    233 define <16 x i16> @shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    234 ; AVX1-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
    235 ; AVX1:       # BB#0:
    236 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    237 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    238 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
    239 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    240 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    241 ; AVX1-NEXT:    retq
    242 ;
    243 ; AVX2-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
    244 ; AVX2:       # BB#0:
    245 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    246 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
    247 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    248 ; AVX2-NEXT:    retq
    249   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 12, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    250   ret <16 x i16> %shuffle
    251 }
    252 
    253 define <16 x i16> @shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    254 ; AVX1-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
    255 ; AVX1:       # BB#0:
    256 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    257 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    258 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
    259 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    260 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    261 ; AVX1-NEXT:    retq
    262 ;
    263 ; AVX2-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
    264 ; AVX2:       # BB#0:
    265 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    266 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
    267 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    268 ; AVX2-NEXT:    retq
    269   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    270   ret <16 x i16> %shuffle
    271 }
    272 
    273 define <16 x i16> @shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    274 ; AVX1-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
    275 ; AVX1:       # BB#0:
    276 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    277 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    278 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
    279 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    280 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    281 ; AVX1-NEXT:    retq
    282 ;
    283 ; AVX2-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
    284 ; AVX2:       # BB#0:
    285 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    286 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
    287 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    288 ; AVX2-NEXT:    retq
    289   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    290   ret <16 x i16> %shuffle
    291 }
    292 
    293 define <16 x i16> @shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    294 ; AVX1-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
    295 ; AVX1:       # BB#0:
    296 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    297 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    298 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    299 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    300 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    301 ; AVX1-NEXT:    retq
    302 ;
    303 ; AVX2-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
    304 ; AVX2:       # BB#0:
    305 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    306 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
    307 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    308 ; AVX2-NEXT:    retq
    309   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    310   ret <16 x i16> %shuffle
    311 }
    312 
    313 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
    314 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
    315 ; AVX1:       # BB#0:
    316 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    317 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    318 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    319 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    320 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    321 ; AVX1-NEXT:    retq
    322 ;
    323 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
    324 ; AVX2:       # BB#0:
    325 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    326 ; AVX2-NEXT:    retq
    327   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
    328   ret <16 x i16> %shuffle
    329 }
    330 
    331 define <16 x i16> @shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15(<16 x i16> %a, <16 x i16> %b) {
    332 ; AVX1-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
    333 ; AVX1:       # BB#0:
    334 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    335 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15]
    336 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    337 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    338 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    339 ; AVX1-NEXT:    retq
    340 ;
    341 ; AVX2-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
    342 ; AVX2:       # BB#0:
    343 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15,30,31,30,31,30,31,30,31,30,31,30,31,30,31,30,31]
    344 ; AVX2-NEXT:    retq
    345   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
    346   ret <16 x i16> %shuffle
    347 }
    348 
    349 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
    350 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
    351 ; AVX1:       # BB#0:
    352 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
    353 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
    354 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    355 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
    356 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
    357 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    358 ; AVX1-NEXT:    retq
    359 ;
    360 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
    361 ; AVX2:       # BB#0:
    362 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
    363 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
    364 ; AVX2-NEXT:    retq
    365   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
    366   ret <16 x i16> %shuffle
    367 }
    368 
    369 define <16 x i16> @shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15(<16 x i16> %a, <16 x i16> %b) {
    370 ; AVX1-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
    371 ; AVX1:       # BB#0:
    372 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
    373 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,7,7,7]
    374 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    375 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
    376 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
    377 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    378 ; AVX1-NEXT:    retq
    379 ;
    380 ; AVX2-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
    381 ; AVX2:       # BB#0:
    382 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15]
    383 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
    384 ; AVX2-NEXT:    retq
    385   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7, i32 11, i32 11, i32 11, i32 11, i32 15, i32 15, i32 15, i32 15>
    386   ret <16 x i16> %shuffle
    387 }
    388 
    389 define <16 x i16> @shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14(<16 x i16> %a, <16 x i16> %b) {
    390 ; AVX1-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
    391 ; AVX1:       # BB#0:
    392 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7]
    393 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
    394 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    395 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
    396 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6]
    397 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    398 ; AVX1-NEXT:    retq
    399 ;
    400 ; AVX2-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
    401 ; AVX2:       # BB#0:
    402 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,2,2,4,5,6,7,8,8,10,10,12,13,14,15]
    403 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,6,8,9,10,11,12,12,14,14]
    404 ; AVX2-NEXT:    retq
    405   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
    406   ret <16 x i16> %shuffle
    407 }
    408 
    409 define <16 x i16> @shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15(<16 x i16> %a, <16 x i16> %b) {
    410 ; AVX1-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
    411 ; AVX1:       # BB#0:
    412 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[1,1,3,3,4,5,6,7]
    413 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,7,7]
    414 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    415 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,1,3,3,4,5,6,7]
    416 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7]
    417 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    418 ; AVX1-NEXT:    retq
    419 ;
    420 ; AVX2-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
    421 ; AVX2:       # BB#0:
    422 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15]
    423 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,7,7,8,9,10,11,13,13,15,15]
    424 ; AVX2-NEXT:    retq
    425   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
    426   ret <16 x i16> %shuffle
    427 }
    428 
    429 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00(<16 x i16> %a, <16 x i16> %b) {
    430 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
    431 ; AVX1:       # BB#0:
    432 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
    433 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    434 ; AVX1-NEXT:    retq
    435 ;
    436 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
    437 ; AVX2:       # BB#0:
    438 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
    439 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
    440 ; AVX2-NEXT:    retq
    441   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
    442   ret <16 x i16> %shuffle
    443 }
    444 
    445 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00(<16 x i16> %a, <16 x i16> %b) {
    446 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
    447 ; AVX1:       # BB#0:
    448 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
    449 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    450 ; AVX1-NEXT:    retq
    451 ;
    452 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
    453 ; AVX2:       # BB#0:
    454 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
    455 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
    456 ; AVX2-NEXT:    retq
    457   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
    458   ret <16 x i16> %shuffle
    459 }
    460 
    461 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    462 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
    463 ; AVX1:       # BB#0:
    464 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
    465 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    466 ; AVX1-NEXT:    retq
    467 ;
    468 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
    469 ; AVX2:       # BB#0:
    470 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
    471 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
    472 ; AVX2-NEXT:    retq
    473   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
    474   ret <16 x i16> %shuffle
    475 }
    476 
    477 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    478 ; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00:
    479 ; AVX1:       # BB#0:
    480 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
    481 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    482 ; AVX1-NEXT:    retq
    483 ;
    484 ; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00:
    485 ; AVX2:       # BB#0:
    486 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
    487 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
    488 ; AVX2-NEXT:    retq
    489   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
    490   ret <16 x i16> %shuffle
    491 }
    492 
    493 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    494 ; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00:
    495 ; AVX1:       # BB#0:
    496 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
    497 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    498 ; AVX1-NEXT:    retq
    499 ;
    500 ; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00:
    501 ; AVX2:       # BB#0:
    502 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
    503 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
    504 ; AVX2-NEXT:    retq
    505   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
    506   ret <16 x i16> %shuffle
    507 }
    508 
    509 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    510 ; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
    511 ; AVX1:       # BB#0:
    512 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
    513 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    514 ; AVX1-NEXT:    retq
    515 ;
    516 ; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
    517 ; AVX2:       # BB#0:
    518 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
    519 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
    520 ; AVX2-NEXT:    retq
    521   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    522   ret <16 x i16> %shuffle
    523 }
    524 
    525 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    526 ; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
    527 ; AVX1:       # BB#0:
    528 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    529 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    530 ; AVX1-NEXT:    retq
    531 ;
    532 ; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
    533 ; AVX2:       # BB#0:
    534 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    535 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
    536 ; AVX2-NEXT:    retq
    537   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    538   ret <16 x i16> %shuffle
    539 }
    540 
    541 define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31(<16 x i16> %a, <16 x i16> %b) {
    542 ; AVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
    543 ; AVX1:       # BB#0:
    544 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    545 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
    546 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
    547 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
    548 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    549 ; AVX1-NEXT:    retq
    550 ;
    551 ; AVX2-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
    552 ; AVX2:       # BB#0:
    553 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
    554 ; AVX2-NEXT:    retq
    555   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
    556   ret <16 x i16> %shuffle
    557 }
    558 
    559 define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15(<16 x i16> %a, <16 x i16> %b) {
    560 ; AVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15:
    561 ; AVX1:       # BB#0:
    562 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    563 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
    564 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
    565 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
    566 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    567 ; AVX1-NEXT:    retq
    568 ;
    569 ; AVX2-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15:
    570 ; AVX2:       # BB#0:
    571 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
    572 ; AVX2-NEXT:    retq
    573   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
    574   ret <16 x i16> %shuffle
    575 }
    576 
    577 define <16 x i16> @shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31(<16 x i16> %a, <16 x i16> %b) {
    578 ; AVX1-LABEL: shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31:
    579 ; AVX1:       # BB#0:
    580 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
    581 ; AVX1-NEXT:    retq
    582 ;
    583 ; AVX2-LABEL: shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31:
    584 ; AVX2:       # BB#0:
    585 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
    586 ; AVX2-NEXT:    retq
    587   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
    588   ret <16 x i16> %shuffle
    589 }
    590 
    591 define <16 x i16> @shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
    592 ; AVX1-LABEL: shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15:
    593 ; AVX1:       # BB#0:
    594 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
    595 ; AVX1-NEXT:    retq
    596 ;
    597 ; AVX2-LABEL: shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15:
    598 ; AVX2:       # BB#0:
    599 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
    600 ; AVX2-NEXT:    retq
    601   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 12, i32 13, i32 14, i32 15>
    602   ret <16 x i16> %shuffle
    603 }
    604 
    605 define <16 x i16> @shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31(<16 x i16> %a, <16 x i16> %b) {
    606 ; AVX1-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31:
    607 ; AVX1:       # BB#0:
    608 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
    609 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    610 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5,6],xmm1[7]
    611 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    612 ; AVX1-NEXT:    retq
    613 ;
    614 ; AVX2-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31:
    615 ; AVX2:       # BB#0:
    616 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0]
    617 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
    618 ; AVX2-NEXT:    retq
    619   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31>
    620   ret <16 x i16> %shuffle
    621 }
    622 
    623 define <16 x i16> @shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
    624 ; AVX1-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15:
    625 ; AVX1:       # BB#0:
    626 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
    627 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    628 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    629 ; AVX1-NEXT:    retq
    630 ;
    631 ; AVX2-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15:
    632 ; AVX2:       # BB#0:
    633 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
    634 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
    635 ; AVX2-NEXT:    retq
    636   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    637   ret <16 x i16> %shuffle
    638 }
    639 
    640 define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15(<16 x i16> %a, <16 x i16> %b) {
    641 ; AVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15:
    642 ; AVX1:       # BB#0:
    643 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    644 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
    645 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
    646 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
    647 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    648 ; AVX1-NEXT:    retq
    649 ;
    650 ; AVX2-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15:
    651 ; AVX2:       # BB#0:
    652 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255]
    653 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
    654 ; AVX2-NEXT:    retq
    655   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
    656   ret <16 x i16> %shuffle
    657 }
    658 
    659 define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31(<16 x i16> %a, <16 x i16> %b) {
    660 ; AVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31:
    661 ; AVX1:       # BB#0:
    662 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    663 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
    664 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
    665 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
    666 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    667 ; AVX1-NEXT:    retq
    668 ;
    669 ; AVX2-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31:
    670 ; AVX2:       # BB#0:
    671 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0]
    672 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
    673 ; AVX2-NEXT:    retq
    674   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
    675   ret <16 x i16> %shuffle
    676 }
    677 
    678 define <16 x i16> @shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31(<16 x i16> %a, <16 x i16> %b) {
    679 ; AVX1-LABEL: shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31:
    680 ; AVX1:       # BB#0:
    681 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4],ymm1[5],ymm0[6],ymm1[7]
    682 ; AVX1-NEXT:    retq
    683 ;
    684 ; AVX2-LABEL: shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31:
    685 ; AVX2:       # BB#0:
    686 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4],ymm1[5],ymm0[6],ymm1[7]
    687 ; AVX2-NEXT:    retq
    688   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 6, i32 7, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
    689   ret <16 x i16> %shuffle
    690 }
    691 
    692 define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16(<16 x i16> %a, <16 x i16> %b) {
    693 ; AVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16:
    694 ; AVX1:       # BB#0:
    695 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    696 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    697 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    698 ; AVX1-NEXT:    retq
    699 ;
    700 ; AVX2-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16:
    701 ; AVX2:       # BB#0:
    702 ; AVX2-NEXT:    vpbroadcastw %xmm1, %ymm1
    703 ; AVX2-NEXT:    vpbroadcastd %xmm0, %ymm0
    704 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
    705 ; AVX2-NEXT:    retq
    706   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16>
    707   ret <16 x i16> %shuffle
    708 }
    709 
    710 define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24(<16 x i16> %a, <16 x i16> %b) {
    711 ; AVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
    712 ; AVX1:       # BB#0:
    713 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    714 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
    715 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
    716 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    717 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    718 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    719 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
    720 ; AVX1-NEXT:    retq
    721 ;
    722 ; AVX2-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
    723 ; AVX2:       # BB#0:
    724 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,0,1,0,1,0,1,12,13,0,1,16,17,16,17,20,21,16,17,16,17,16,17,28,29,16,17]
    725 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
    726 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
    727 ; AVX2-NEXT:    retq
    728   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24>
    729   ret <16 x i16> %shuffle
    730 }
    731 
    732 define <16 x i16> @shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
    733 ; AVX1-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15:
    734 ; AVX1:       # BB#0:
    735 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    736 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
    737 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
    738 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7]
    739 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
    740 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    741 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
    742 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
    743 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    744 ; AVX1-NEXT:    retq
    745 ;
    746 ; AVX2-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15:
    747 ; AVX2:       # BB#0:
    748 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm1 = ymm1[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
    749 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
    750 ; AVX2-NEXT:    retq
    751   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 4, i32 5, i32 6, i32 7, i32 24, i32 24, i32 24, i32 24, i32 12, i32 13, i32 14, i32 15>
    752   ret <16 x i16> %shuffle
    753 }
    754 
    755 define <16 x i16> @shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12(<16 x i16> %a, <16 x i16> %b) {
    756 ; AVX1-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
    757 ; AVX1:       # BB#0:
    758 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    759 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
    760 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
    761 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
    762 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7]
    763 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
    764 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
    765 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
    766 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
    767 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
    768 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    769 ; AVX1-NEXT:    retq
    770 ;
    771 ; AVX2-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
    772 ; AVX2:       # BB#0:
    773 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
    774 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
    775 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12]
    776 ; AVX2-NEXT:    retq
    777   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 7, i32 6, i32 5, i32 4, i32 27, i32 26, i32 25, i32 24, i32 15, i32 14, i32 13, i32 12>
    778   ret <16 x i16> %shuffle
    779 }
    780 
    781 define <16 x i16> @shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08(<16 x i16> %a, <16 x i16> %b) {
    782 ; AVX1-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08:
    783 ; AVX1:       # BB#0:
    784 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    785 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
    786 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
    787 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [12,13,8,9,4,5,0,1,14,15,10,11,6,7,2,3]
    788 ; AVX1-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
    789 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
    790 ; AVX1-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
    791 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    792 ; AVX1-NEXT:    retq
    793 ;
    794 ; AVX2-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08:
    795 ; AVX2:       # BB#0:
    796 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm1 = ymm1[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
    797 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
    798 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12]
    799 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
    800 ; AVX2-NEXT:    retq
    801   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 3, i32 2, i32 1, i32 0, i32 27, i32 26, i32 25, i32 24, i32 11, i32 10, i32 9, i32 8>
    802   ret <16 x i16> %shuffle
    803 }
    804 
    805 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08(<16 x i16> %a, <16 x i16> %b) {
    806 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
    807 ; AVX1:       # BB#0:
    808 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    809 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
    810 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    811 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    812 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    813 ; AVX1-NEXT:    retq
    814 ;
    815 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
    816 ; AVX2:       # BB#0:
    817 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,18,19,16,17]
    818 ; AVX2-NEXT:    retq
    819   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 8>
    820   ret <16 x i16> %shuffle
    821 }
    822 
    823 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08(<16 x i16> %a, <16 x i16> %b) {
    824 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
    825 ; AVX1:       # BB#0:
    826 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    827 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
    828 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    829 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    830 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    831 ; AVX1-NEXT:    retq
    832 ;
    833 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
    834 ; AVX2:       # BB#0:
    835 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,20,21,16,17,16,17]
    836 ; AVX2-NEXT:    retq
    837   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 10, i32 8, i32 8>
    838   ret <16 x i16> %shuffle
    839 }
    840 
    841 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08(<16 x i16> %a, <16 x i16> %b) {
    842 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
    843 ; AVX1:       # BB#0:
    844 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    845 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
    846 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    847 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    848 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    849 ; AVX1-NEXT:    retq
    850 ;
    851 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
    852 ; AVX2:       # BB#0:
    853 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,22,23,16,17,16,17,16,17]
    854 ; AVX2-NEXT:    retq
    855   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 11, i32 8, i32 8, i32 8>
    856   ret <16 x i16> %shuffle
    857 }
    858 
    859 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
    860 ; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08:
    861 ; AVX1:       # BB#0:
    862 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    863 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
    864 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    865 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    866 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    867 ; AVX1-NEXT:    retq
    868 ;
    869 ; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08:
    870 ; AVX2:       # BB#0:
    871 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,24,25,16,17,16,17,16,17,16,17]
    872 ; AVX2-NEXT:    retq
    873   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 12, i32 8, i32 8, i32 8, i32 8>
    874   ret <16 x i16> %shuffle
    875 }
    876 
    877 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
    878 ; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08:
    879 ; AVX1:       # BB#0:
    880 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    881 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
    882 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    883 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    884 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    885 ; AVX1-NEXT:    retq
    886 ;
    887 ; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08:
    888 ; AVX2:       # BB#0:
    889 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,26,27,16,17,16,17,16,17,16,17,16,17]
    890 ; AVX2-NEXT:    retq
    891   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 13, i32 8, i32 8, i32 8, i32 8, i32 8>
    892   ret <16 x i16> %shuffle
    893 }
    894 
    895 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
    896 ; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08:
    897 ; AVX1:       # BB#0:
    898 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    899 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
    900 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    901 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    902 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    903 ; AVX1-NEXT:    retq
    904 ;
    905 ; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08:
    906 ; AVX2:       # BB#0:
    907 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,28,29,16,17,16,17,16,17,16,17,16,17,16,17]
    908 ; AVX2-NEXT:    retq
    909   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 14, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
    910   ret <16 x i16> %shuffle
    911 }
    912 
    913 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
    914 ; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08:
    915 ; AVX1:       # BB#0:
    916 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    917 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    918 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    919 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    920 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    921 ; AVX1-NEXT:    retq
    922 ;
    923 ; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08:
    924 ; AVX2:       # BB#0:
    925 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,30,31,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    926 ; AVX2-NEXT:    retq
    927   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
    928   ret <16 x i16> %shuffle
    929 }
    930 
    931 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) {
    932 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27:
    933 ; AVX1:       # BB#0:
    934 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    935 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
    936 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
    937 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    938 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    939 ; AVX1-NEXT:    retq
    940 ;
    941 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27:
    942 ; AVX2:       # BB#0:
    943 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
    944 ; AVX2-NEXT:    retq
    945   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
    946   ret <16 x i16> %shuffle
    947 }
    948 
    949 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
    950 ; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31:
    951 ; AVX1:       # BB#0:
    952 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    953 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
    954 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
    955 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    956 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    957 ; AVX1-NEXT:    retq
    958 ;
    959 ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31:
    960 ; AVX2:       # BB#0:
    961 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
    962 ; AVX2-NEXT:    retq
    963   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
    964   ret <16 x i16> %shuffle
    965 }
    966 
    967 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
    968 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31:
    969 ; AVX1:       # BB#0:
    970 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    971 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
    972 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
    973 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    974 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    975 ; AVX1-NEXT:    retq
    976 ;
    977 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31:
    978 ; AVX2:       # BB#0:
    979 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,6,7,u,u,24,25,u,u,26,27,u,u,28,29,u,u,30,31]
    980 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,2,3,u,u,4,5,u,u,6,7,u,u,24,25,u,u,26,27,u,u,28,29,u,u,30,31,u,u]
    981 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
    982 ; AVX2-NEXT:    retq
    983   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
    984   ret <16 x i16> %shuffle
    985 }
    986 
    987 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) {
    988 ; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27:
    989 ; AVX1:       # BB#0:
    990 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    991 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
    992 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
    993 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    994 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    995 ; AVX1-NEXT:    retq
    996 ;
    997 ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27:
    998 ; AVX2:       # BB#0:
    999 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,8,9,u,u,10,11,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
   1000 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,u,u,10,11,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23,u,u]
   1001 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
   1002 ; AVX2-NEXT:    retq
   1003   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
   1004   ret <16 x i16> %shuffle
   1005 }
   1006 
   1007 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   1008 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08:
   1009 ; AVX1:       # BB#0:
   1010 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
   1011 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1012 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,0,1,0,1,0,1,0,1,0,1,0,1]
   1013 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1014 ; AVX1-NEXT:    retq
   1015 ;
   1016 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08:
   1017 ; AVX2:       # BB#0:
   1018 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,18,19,16,17,16,17,16,17,16,17,16,17,16,17]
   1019 ; AVX2-NEXT:    retq
   1020   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 8, i32 9, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
   1021   ret <16 x i16> %shuffle
   1022 }
   1023 
   1024 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   1025 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08:
   1026 ; AVX1:       # BB#0:
   1027 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
   1028 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1029 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,0,1,0,1,0,1,0,1,0,1]
   1030 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1031 ; AVX1-NEXT:    retq
   1032 ;
   1033 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08:
   1034 ; AVX2:       # BB#0:
   1035 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,20,21,16,17,16,17,16,17,16,17,16,17]
   1036 ; AVX2-NEXT:    retq
   1037   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 8, i32 8, i32 10, i32 8, i32 8, i32 8, i32 8, i32 8>
   1038   ret <16 x i16> %shuffle
   1039 }
   1040 
   1041 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   1042 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08:
   1043 ; AVX1:       # BB#0:
   1044 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
   1045 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1046 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,6,7,0,1,0,1,0,1,0,1]
   1047 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1048 ; AVX1-NEXT:    retq
   1049 ;
   1050 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08:
   1051 ; AVX2:       # BB#0:
   1052 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,22,23,16,17,16,17,16,17,16,17]
   1053 ; AVX2-NEXT:    retq
   1054   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 11, i32 8, i32 8, i32 8, i32 8>
   1055   ret <16 x i16> %shuffle
   1056 }
   1057 
   1058 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   1059 ; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08:
   1060 ; AVX1:       # BB#0:
   1061 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
   1062 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1063 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,0,1,0,1,0,1]
   1064 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1065 ; AVX1-NEXT:    retq
   1066 ;
   1067 ; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08:
   1068 ; AVX2:       # BB#0:
   1069 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,16,17,16,17,16,17]
   1070 ; AVX2-NEXT:    retq
   1071   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 12, i32 8, i32 8, i32 8>
   1072   ret <16 x i16> %shuffle
   1073 }
   1074 
   1075 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08(<16 x i16> %a, <16 x i16> %b) {
   1076 ; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08:
   1077 ; AVX1:       # BB#0:
   1078 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
   1079 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1080 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1]
   1081 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1082 ; AVX1-NEXT:    retq
   1083 ;
   1084 ; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08:
   1085 ; AVX2:       # BB#0:
   1086 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,26,27,16,17,16,17]
   1087 ; AVX2-NEXT:    retq
   1088   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 13, i32 8, i32 8>
   1089   ret <16 x i16> %shuffle
   1090 }
   1091 
   1092 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08(<16 x i16> %a, <16 x i16> %b) {
   1093 ; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08:
   1094 ; AVX1:       # BB#0:
   1095 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
   1096 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1097 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,12,13,0,1]
   1098 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1099 ; AVX1-NEXT:    retq
   1100 ;
   1101 ; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08:
   1102 ; AVX2:       # BB#0:
   1103 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,28,29,16,17]
   1104 ; AVX2-NEXT:    retq
   1105   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 14, i32 8>
   1106   ret <16 x i16> %shuffle
   1107 }
   1108 
   1109 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15(<16 x i16> %a, <16 x i16> %b) {
   1110 ; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15:
   1111 ; AVX1:       # BB#0:
   1112 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
   1113 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1114 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,14,15]
   1115 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1116 ; AVX1-NEXT:    retq
   1117 ;
   1118 ; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15:
   1119 ; AVX2:       # BB#0:
   1120 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,30,31]
   1121 ; AVX2-NEXT:    retq
   1122   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 15>
   1123   ret <16 x i16> %shuffle
   1124 }
   1125 
   1126 define <16 x i16> @shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08(<16 x i16> %a, <16 x i16> %b) {
   1127 ; AVX1-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08:
   1128 ; AVX1:       # BB#0:
   1129 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7]
   1130 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
   1131 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1132 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,8,9,8,9,4,5,4,5,0,1,0,1]
   1133 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1134 ; AVX1-NEXT:    retq
   1135 ;
   1136 ; AVX2-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08:
   1137 ; AVX2:       # BB#0:
   1138 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13,28,29,28,29,24,25,24,25,20,21,20,21,16,17,16,17]
   1139 ; AVX2-NEXT:    retq
   1140   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 14, i32 14, i32 12, i32 12, i32 10, i32 10, i32 8, i32 8>
   1141   ret <16 x i16> %shuffle
   1142 }
   1143 
   1144 define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   1145 ; AVX1-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12:
   1146 ; AVX1:       # BB#0:
   1147 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
   1148 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1149 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   1150 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
   1151 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1152 ; AVX1-NEXT:    retq
   1153 ;
   1154 ; AVX2-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12:
   1155 ; AVX2:       # BB#0:
   1156 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
   1157 ; AVX2-NEXT:    retq
   1158   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
   1159   ret <16 x i16> %shuffle
   1160 }
   1161 
   1162 define <16 x i16> @shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08(<16 x i16> %a, <16 x i16> %b) {
   1163 ; AVX1-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08:
   1164 ; AVX1:       # BB#0:
   1165 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,0,1,0,1,0,1,0,1,0,1]
   1166 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1167 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,6,7,0,1,0,1,12,13,0,1]
   1168 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1169 ; AVX1-NEXT:    retq
   1170 ;
   1171 ; AVX2-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08:
   1172 ; AVX2:       # BB#0:
   1173 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,28,29,16,17]
   1174 ; AVX2-NEXT:    retq
   1175   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 undef, i32 undef, i32 8, i32 8, i32 14, i32 8>
   1176   ret <16 x i16> %shuffle
   1177 }
   1178 
   1179 define <16 x i16> @shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15(<16 x i16> %a, <16 x i16> %b) {
   1180 ; AVX1-LABEL: shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15:
   1181 ; AVX1:       # BB#0:
   1182 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[14,15,2,3,0,1,0,1,0,1,0,1,0,1,0,1]
   1183 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1184 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,6,7,0,1,0,1,0,1,14,15]
   1185 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1186 ; AVX1-NEXT:    retq
   1187 ;
   1188 ; AVX2-LABEL: shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15:
   1189 ; AVX2:       # BB#0:
   1190 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,u,u,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,16,17,30,31]
   1191 ; AVX2-NEXT:    retq
   1192   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 undef, i32 undef, i32 8, i32 8, i32 8, i32 15>
   1193   ret <16 x i16> %shuffle
   1194 }
   1195 
   1196 define <16 x i16> @shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08(<16 x i16> %a, <16 x i16> %b) {
   1197 ; AVX1-LABEL: shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08:
   1198 ; AVX1:       # BB#0:
   1199 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,1,2,2,4,5,6,7]
   1200 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
   1201 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1202 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,12,13,8,9,4,5,4,5,0,1,0,1]
   1203 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1204 ; AVX1-NEXT:    retq
   1205 ;
   1206 ; AVX2-LABEL: shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08:
   1207 ; AVX2:       # BB#0:
   1208 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,4,5,8,9,8,9,u,u,12,13,28,29,28,29,u,u,24,25,20,21,20,21,16,17,16,17]
   1209 ; AVX2-NEXT:    retq
   1210   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 undef, i32 2, i32 4, i32 4, i32 undef, i32 6, i32 14, i32 14, i32 undef, i32 12, i32 10, i32 10, i32 8, i32 8>
   1211   ret <16 x i16> %shuffle
   1212 }
   1213 
   1214 define <16 x i16> @shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   1215 ; AVX1-LABEL: shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12:
   1216 ; AVX1:       # BB#0:
   1217 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,1,2,3]
   1218 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
   1219 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1220 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,3,4,5,6,7]
   1221 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
   1222 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1223 ; AVX1-NEXT:    retq
   1224 ;
   1225 ; AVX2-LABEL: shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12:
   1226 ; AVX2:       # BB#0:
   1227 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,u,u,u,u,u,u,u,u,16,17,16,17,16,17,u,u,u,u,24,25,24,25,24,25]
   1228 ; AVX2-NEXT:    retq
   1229   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 12, i32 12, i32 12>
   1230   ret <16 x i16> %shuffle
   1231 }
   1232 
   1233 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20(<16 x i16> %a, <16 x i16> %b) {
   1234 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20:
   1235 ; AVX1:       # BB#0:
   1236 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   1237 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
   1238 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
   1239 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
   1240 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1241 ; AVX1-NEXT:    retq
   1242 ;
   1243 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20:
   1244 ; AVX2:       # BB#0:
   1245 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1246 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
   1247 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
   1248 ; AVX2-NEXT:    retq
   1249   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20>
   1250   ret <16 x i16> %shuffle
   1251 }
   1252 
   1253 define <16 x i16> @shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20(<16 x i16> %a, <16 x i16> %b) {
   1254 ; AVX1-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20:
   1255 ; AVX1:       # BB#0:
   1256 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1257 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   1258 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
   1259 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
   1260 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
   1261 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1262 ; AVX1-NEXT:    retq
   1263 ;
   1264 ; AVX2-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20:
   1265 ; AVX2:       # BB#0:
   1266 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
   1267 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
   1268 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
   1269 ; AVX2-NEXT:    retq
   1270   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20>
   1271   ret <16 x i16> %shuffle
   1272 }
   1273 
   1274 define <16 x i16> @shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28(<16 x i16> %a, <16 x i16> %b) {
   1275 ; AVX1-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28:
   1276 ; AVX1:       # BB#0:
   1277 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1278 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   1279 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
   1280 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
   1281 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
   1282 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
   1283 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1284 ; AVX1-NEXT:    retq
   1285 ;
   1286 ; AVX2-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28:
   1287 ; AVX2:       # BB#0:
   1288 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
   1289 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
   1290 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
   1291 ; AVX2-NEXT:    retq
   1292   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28>
   1293   ret <16 x i16> %shuffle
   1294 }
   1295 
   1296 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28(<16 x i16> %a, <16 x i16> %b) {
   1297 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28:
   1298 ; AVX1:       # BB#0:
   1299 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   1300 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
   1301 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
   1302 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
   1303 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
   1304 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1305 ; AVX1-NEXT:    retq
   1306 ;
   1307 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28:
   1308 ; AVX2:       # BB#0:
   1309 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
   1310 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
   1311 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
   1312 ; AVX2-NEXT:    retq
   1313   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28>
   1314   ret <16 x i16> %shuffle
   1315 }
   1316 
   1317 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i16> %a, <16 x i16> %b) {
   1318 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
   1319 ; AVX1:       # BB#0:
   1320 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1321 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1322 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1323 ; AVX1-NEXT:    retq
   1324 ;
   1325 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
   1326 ; AVX2:       # BB#0:
   1327 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1328 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1329 ; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
   1330 ; AVX2-NEXT:    retq
   1331   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
   1332   ret <16 x i16> %shuffle
   1333 }
   1334 
   1335 define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) {
   1336 ; AVX1-LABEL: shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24:
   1337 ; AVX1:       # BB#0:
   1338 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
   1339 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1340 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
   1341 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1342 ; AVX1-NEXT:    retq
   1343 ;
   1344 ; AVX2-LABEL: shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24:
   1345 ; AVX2:       # BB#0:
   1346 ; AVX2-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
   1347 ; AVX2-NEXT:    retq
   1348   %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24>
   1349   ret <16 x i16> %shuffle
   1350 }
   1351 
   1352 define <16 x i16> @shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz(<16 x i16> %a) {
   1353 ; AVX1-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz:
   1354 ; AVX1:       # BB#0:
   1355 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
   1356 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1357 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
   1358 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1359 ; AVX1-NEXT:    retq
   1360 ;
   1361 ; AVX2-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz:
   1362 ; AVX2:       # BB#0:
   1363 ; AVX2-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero
   1364 ; AVX2-NEXT:    retq
   1365   %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0>
   1366   ret <16 x i16> %shuffle
   1367 }
   1368 
   1369 ;
   1370 ; Shuffle to logical bit shifts
   1371 ;
   1372 
   1373 define <16 x i16> @shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14(<16 x i16> %a) {
   1374 ; AVX1-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
   1375 ; AVX1:       # BB#0:
   1376 ; AVX1-NEXT:    vpslld $16, %xmm0, %xmm1
   1377 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1378 ; AVX1-NEXT:    vpslld $16, %xmm0, %xmm0
   1379 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1380 ; AVX1-NEXT:    retq
   1381 ;
   1382 ; AVX2-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
   1383 ; AVX2:       # BB#0:
   1384 ; AVX2-NEXT:    vpslld $16, %ymm0, %ymm0
   1385 ; AVX2-NEXT:    retq
   1386   %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 0, i32 16, i32 2, i32 16, i32 4, i32 16, i32 6, i32 16, i32 8, i32 16, i32 10, i32 16, i32 12, i32 16, i32 14>
   1387   ret <16 x i16> %shuffle
   1388 }
   1389 
   1390 define <16 x i16> @shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12(<16 x i16> %a) {
   1391 ; AVX1-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
   1392 ; AVX1:       # BB#0:
   1393 ; AVX1-NEXT:    vpsllq $48, %xmm0, %xmm1
   1394 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1395 ; AVX1-NEXT:    vpsllq $48, %xmm0, %xmm0
   1396 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1397 ; AVX1-NEXT:    retq
   1398 ;
   1399 ; AVX2-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
   1400 ; AVX2:       # BB#0:
   1401 ; AVX2-NEXT:    vpsllq $48, %ymm0, %ymm0
   1402 ; AVX2-NEXT:    retq
   1403   %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 12>
   1404   ret <16 x i16> %shuffle
   1405 }
   1406 
   1407 define <16 x i16> @shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz(<16 x i16> %a) {
   1408 ; AVX1-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz:
   1409 ; AVX1:       # BB#0:
   1410 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm1
   1411 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1412 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
   1413 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1414 ; AVX1-NEXT:    retq
   1415 ;
   1416 ; AVX2-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz:
   1417 ; AVX2:       # BB#0:
   1418 ; AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
   1419 ; AVX2-NEXT:    retq
   1420   %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 1, i32 16, i32 3, i32 16, i32 5, i32 16, i32 7, i32 16, i32 9, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15, i32 16>
   1421   ret <16 x i16> %shuffle
   1422 }
   1423 
   1424 define <16 x i16> @shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz(<16 x i16> %a) {
   1425 ; AVX1-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:
   1426 ; AVX1:       # BB#0:
   1427 ; AVX1-NEXT:    vxorps %ymm1, %ymm1, %ymm1
   1428 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
   1429 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
   1430 ; AVX1-NEXT:    retq
   1431 ;
   1432 ; AVX2-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:
   1433 ; AVX2:       # BB#0:
   1434 ; AVX2-NEXT:    vpsrlq $32, %ymm0, %ymm0
   1435 ; AVX2-NEXT:    retq
   1436   %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 16, i32 16, i32 10, i32 11, i32 16, i32 16, i32 14, i32 15, i32 16, i32 16>
   1437   ret <16 x i16> %shuffle
   1438 }
   1439 
   1440 define <16 x i16> @shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz(<16 x i16> %a) {
   1441 ; AVX1-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz:
   1442 ; AVX1:       # BB#0:
   1443 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   1444 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
   1445 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   1446 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1447 ; AVX1-NEXT:    retq
   1448 ;
   1449 ; AVX2-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz:
   1450 ; AVX2:       # BB#0:
   1451 ; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1452 ; AVX2-NEXT:    retq
   1453   %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 16, i32 0, i32 0, i32 0, i32 17, i32 0, i32 0, i32 0, i32 18, i32 0, i32 0, i32 0, i32 19, i32 0, i32 0, i32 0>
   1454   ret <16 x i16> %shuffle
   1455 }
   1456 
   1457 define <16 x i16> @shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz(<16 x i16> %a) {
   1458 ; AVX1-LABEL: shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz:
   1459 ; AVX1:       # BB#0:
   1460 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1461 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1462 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1463 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1464 ; AVX1-NEXT:    retq
   1465 ;
   1466 ; AVX2-LABEL: shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz:
   1467 ; AVX2:       # BB#0:
   1468 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1469 ; AVX2-NEXT:    retq
   1470   %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 16, i32 0, i32 17, i32 0, i32 18, i32 0, i32 19, i32 0, i32 20, i32 0, i32 21, i32 0, i32 22, i32 0, i32 23, i32 0>
   1471   ret <16 x i16> %shuffle
   1472 }
   1473 
   1474 define <16 x i16> @shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14(<16 x i16> %a, <16 x i16> %b) {
   1475 ; AVX1-LABEL: shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14:
   1476 ; AVX1:       # BB#0:
   1477 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   1478 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   1479 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   1480 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   1481 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1482 ; AVX1-NEXT:    retq
   1483 ;
   1484 ; AVX2-LABEL: shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14:
   1485 ; AVX2:       # BB#0:
   1486 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm1[30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
   1487 ; AVX2-NEXT:    retq
   1488   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
   1489   ret <16 x i16> %shuffle
   1490 }
   1491 
   1492 define <16 x i16> @shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24(<16 x i16> %a, <16 x i16> %b) {
   1493 ; AVX1-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24:
   1494 ; AVX1:       # BB#0:
   1495 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   1496 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   1497 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1]
   1498 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1]
   1499 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1500 ; AVX1-NEXT:    retq
   1501 ;
   1502 ; AVX2-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24:
   1503 ; AVX2:       # BB#0:
   1504 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1],ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17]
   1505 ; AVX2-NEXT:    retq
   1506   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24>
   1507   ret <16 x i16> %shuffle
   1508 }
   1509 
   1510 define <16 x i16> @shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8(<16 x i16> %a, <16 x i16> %b) {
   1511 ; AVX1-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8:
   1512 ; AVX1:       # BB#0:
   1513 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   1514 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   1515 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1]
   1516 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
   1517 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1518 ; AVX1-NEXT:    retq
   1519 ;
   1520 ; AVX2-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8:
   1521 ; AVX2:       # BB#0:
   1522 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
   1523 ; AVX2-NEXT:    retq
   1524   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 00, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 8>
   1525   ret <16 x i16> %shuffle
   1526 }
   1527 
   1528 define <16 x i16> @shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30(<16 x i16> %a, <16 x i16> %b) {
   1529 ; AVX1-LABEL: shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30:
   1530 ; AVX1:       # BB#0:
   1531 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   1532 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   1533 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   1534 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   1535 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1536 ; AVX1-NEXT:    retq
   1537 ;
   1538 ; AVX2-LABEL: shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30:
   1539 ; AVX2:       # BB#0:
   1540 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm0[30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
   1541 ; AVX2-NEXT:    retq
   1542   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
   1543   ret <16 x i16> %shuffle
   1544 }
   1545 
   1546 define <16 x i16> @shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16(<16 x i16> %a, <16 x i16> %b) {
   1547 ; AVX1-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16:
   1548 ; AVX1:       # BB#0:
   1549 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
   1550 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
   1551 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1552 ; AVX1-NEXT:    retq
   1553 ;
   1554 ; AVX2-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16:
   1555 ; AVX2:       # BB#0:
   1556 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1557 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,18,19,20,21,22,23,24,25,26,27,28,29,30,31,16,17]
   1558 ; AVX2-NEXT:    retq
   1559   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16>
   1560   ret <16 x i16> %shuffle
   1561 }
   1562 
   1563 define <16 x i16> @shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22(<16 x i16> %a, <16 x i16> %b) {
   1564 ; AVX1-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22:
   1565 ; AVX1:       # BB#0:
   1566 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   1567 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   1568 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1569 ; AVX1-NEXT:    retq
   1570 ;
   1571 ; AVX2-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22:
   1572 ; AVX2:       # BB#0:
   1573 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1574 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,30,31,16,17,18,19,20,21,22,23,24,25,26,27,28,29]
   1575 ; AVX2-NEXT:    retq
   1576   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 23, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
   1577   ret <16 x i16> %shuffle
   1578 }
   1579 
   1580 define <16 x i16> @shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11(<16 x i16> %a, <16 x i16> %b) {
   1581 ; AVX1-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
   1582 ; AVX1:       # BB#0:
   1583 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1584 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1585 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7]
   1586 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7]
   1587 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
   1588 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1589 ; AVX1-NEXT:    retq
   1590 ;
   1591 ; AVX2-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
   1592 ; AVX2:       # BB#0:
   1593 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1594 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1595 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7]
   1596 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7]
   1597 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
   1598 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1599 ; AVX2-NEXT:    retq
   1600   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 11, i32 8, i32 9, i32 8, i32 9, i32 10, i32 11, i32 10, i32 11>
   1601   ret <16 x i16> %shuffle
   1602 }
   1603 
   1604 define <16 x i16> @shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09(<16 x i16> %a, <16 x i16> %b) {
   1605 ; AVX1-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09:
   1606 ; AVX1:       # BB#0:
   1607 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1608 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
   1609 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
   1610 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0]
   1611 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1612 ; AVX1-NEXT:    retq
   1613 ;
   1614 ; AVX2-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09:
   1615 ; AVX2:       # BB#0:
   1616 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1617 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
   1618 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
   1619 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0]
   1620 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1621 ; AVX2-NEXT:    retq
   1622   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 9, i32 14, i32 15, i32 12, i32 13, i32 10, i32 11, i32 8, i32 9>
   1623   ret <16 x i16> %shuffle
   1624 }
   1625 
   1626 define <16 x i16> @shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27(<16 x i16> %a, <16 x i16> %b) {
   1627 ; AVX1-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27:
   1628 ; AVX1:       # BB#0:
   1629 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   1630 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   1631 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7]
   1632 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
   1633 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15]
   1634 ; AVX1-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
   1635 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1636 ; AVX1-NEXT:    retq
   1637 ;
   1638 ; AVX2-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27:
   1639 ; AVX2:       # BB#0:
   1640 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
   1641 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1642 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   1643 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
   1644 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
   1645 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1646 ; AVX2-NEXT:    retq
   1647   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 27, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27>
   1648   ret <16 x i16> %shuffle
   1649 }
   1650 
   1651 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   1652 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
   1653 ; AVX1:       # BB#0:
   1654 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1655 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1656 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3]
   1657 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
   1658 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1659 ; AVX1-NEXT:    retq
   1660 ;
   1661 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
   1662 ; AVX2:       # BB#0:
   1663 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1664 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1665 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3]
   1666 ; AVX2-NEXT:    vpbroadcastw %xmm1, %xmm1
   1667 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1668 ; AVX2-NEXT:    retq
   1669   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
   1670   ret <16 x i16> %shuffle
   1671 }
   1672 
   1673 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   1674 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
   1675 ; AVX1:       # BB#0:
   1676 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1677 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   1678 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   1679 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,7]
   1680 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1681 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
   1682 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
   1683 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1684 ; AVX1-NEXT:    retq
   1685 ;
   1686 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
   1687 ; AVX2:       # BB#0:
   1688 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1689 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm2
   1690 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   1691 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,7]
   1692 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1693 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
   1694 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
   1695 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1696 ; AVX2-NEXT:    retq
   1697   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 12, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
   1698   ret <16 x i16> %shuffle
   1699 }
   1700 
   1701 define <16 x i16> @shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11(<16 x i16> %a, <16 x i16> %b) {
   1702 ; AVX1-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
   1703 ; AVX1:       # BB#0:
   1704 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1705 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1706 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1707 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
   1708 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
   1709 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1710 ; AVX1-NEXT:    retq
   1711 ;
   1712 ; AVX2-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
   1713 ; AVX2:       # BB#0:
   1714 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1715 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1716 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1717 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
   1718 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
   1719 ; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
   1720 ; AVX2-NEXT:    retq
   1721   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 11, i32 undef, i32 8, i32 undef, i32 9, i32 undef, i32 10, i32 undef, i32 11>
   1722   ret <16 x i16> %shuffle
   1723 }
   1724 
   1725 define <16 x i16> @shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15(<16 x i16> %a, <16 x i16> %b) {
   1726 ; AVX1-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
   1727 ; AVX1:       # BB#0:
   1728 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1729 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1730 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1731 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
   1732 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
   1733 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1734 ; AVX1-NEXT:    retq
   1735 ;
   1736 ; AVX2-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
   1737 ; AVX2:       # BB#0:
   1738 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1739 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1740 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1741 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
   1742 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
   1743 ; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
   1744 ; AVX2-NEXT:    retq
   1745   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 15, i32 undef, i32 12, i32 undef, i32 13, i32 undef, i32 14, i32 undef, i32 15>
   1746   ret <16 x i16> %shuffle
   1747 }
   1748 
   1749 define <16 x i16> @shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13(<16 x i16> %a, <16 x i16> %b) {
   1750 ; AVX1-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
   1751 ; AVX1:       # BB#0:
   1752 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1753 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
   1754 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
   1755 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
   1756 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7]
   1757 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
   1758 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1759 ; AVX1-NEXT:    retq
   1760 ;
   1761 ; AVX2-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
   1762 ; AVX2:       # BB#0:
   1763 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1764 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
   1765 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
   1766 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
   1767 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7]
   1768 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
   1769 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1770 ; AVX2-NEXT:    retq
   1771   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 13, i32 11, i32 9, i32 10, i32 8, i32 14, i32 15, i32 12, i32 13>
   1772   ret <16 x i16> %shuffle
   1773 }
   1774 
   1775 define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   1776 ; AVX1-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
   1777 ; AVX1:       # BB#0:
   1778 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1779 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
   1780 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,14,15]
   1781 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1782 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
   1783 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1784 ; AVX1-NEXT:    retq
   1785 ;
   1786 ; AVX2-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
   1787 ; AVX2:       # BB#0:
   1788 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1789 ; AVX2-NEXT:    vpbroadcastw %xmm1, %xmm2
   1790 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,14,15]
   1791 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1792 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
   1793 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1794 ; AVX2-NEXT:    retq
   1795   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 8, i32 12, i32 12, i32 12, i32 12, i32 8, i32 8, i32 8, i32 8>
   1796   ret <16 x i16> %shuffle
   1797 }
   1798 
   1799 define <16 x i16> @shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13(<16 x i16> %a, <16 x i16> %b) {
   1800 ; AVX1-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13:
   1801 ; AVX1:       # BB#0:
   1802 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1803 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
   1804 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
   1805 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,0,3,2]
   1806 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1807 ; AVX1-NEXT:    retq
   1808 ;
   1809 ; AVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13:
   1810 ; AVX2:       # BB#0:
   1811 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1812 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
   1813 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
   1814 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,0,3,2]
   1815 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1816 ; AVX2-NEXT:    retq
   1817   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 13, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
   1818   ret <16 x i16> %shuffle
   1819 }
   1820 
   1821 define <16 x i16> @shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13(<16 x i16> %a, <16 x i16> %b) {
   1822 ; AVX1-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
   1823 ; AVX1:       # BB#0:
   1824 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1825 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
   1826 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
   1827 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
   1828 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7]
   1829 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
   1830 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1831 ; AVX1-NEXT:    retq
   1832 ;
   1833 ; AVX2-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
   1834 ; AVX2:       # BB#0:
   1835 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1836 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
   1837 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
   1838 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
   1839 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7]
   1840 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
   1841 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1842 ; AVX2-NEXT:    retq
   1843   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 13, i32 10, i32 11, i32 8, i32 10, i32 14, i32 15, i32 12, i32 13>
   1844   ret <16 x i16> %shuffle
   1845 }
   1846 
   1847 define <16 x i16> @shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15(<16 x i16> %a, <16 x i16> %b) {
   1848 ; AVX1-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15:
   1849 ; AVX1:       # BB#0:
   1850 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1851 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
   1852 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   1853 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3]
   1854 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7]
   1855 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1856 ; AVX1-NEXT:    retq
   1857 ;
   1858 ; AVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15:
   1859 ; AVX2:       # BB#0:
   1860 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1861 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
   1862 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   1863 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3]
   1864 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7]
   1865 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1866 ; AVX2-NEXT:    retq
   1867   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 15, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 15>
   1868   ret <16 x i16> %shuffle
   1869 }
   1870 
   1871 define <16 x i16> @shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08(<16 x i16> %a, <16 x i16> %b) {
   1872 ; AVX1-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08:
   1873 ; AVX1:       # BB#0:
   1874 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1875 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
   1876 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   1877 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
   1878 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   1879 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   1880 ; AVX1-NEXT:    retq
   1881 ;
   1882 ; AVX2-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08:
   1883 ; AVX2:       # BB#0:
   1884 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1885 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
   1886 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   1887 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
   1888 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   1889 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
   1890 ; AVX2-NEXT:    retq
   1891   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 8, i32 15, i32 13, i32 14, i32 12, i32 11, i32 9, i32 10, i32 8>
   1892   ret <16 x i16> %shuffle
   1893 }
   1894 
   1895 define <16 x i16> @shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08(<16 x i16> %a, <16 x i16> %b) {
   1896 ; AVX1-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
   1897 ; AVX1:       # BB#0:
   1898 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1899 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
   1900 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,2,3]
   1901 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1902 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
   1903 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1904 ; AVX1-NEXT:    retq
   1905 ;
   1906 ; AVX2-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
   1907 ; AVX2:       # BB#0:
   1908 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1909 ; AVX2-NEXT:    vpbroadcastw %xmm1, %xmm2
   1910 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,2,3]
   1911 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1912 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
   1913 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1914 ; AVX2-NEXT:    retq
   1915   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 8, i32 9, i32 8, i32 13, i32 12, i32 13, i32 12, i32 9, i32 8>
   1916   ret <16 x i16> %shuffle
   1917 }
   1918 
   1919 define <16 x i16> @shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08(<16 x i16> %a, <16 x i16> %b) {
   1920 ; AVX1-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
   1921 ; AVX1:       # BB#0:
   1922 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1923 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
   1924 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,2,3]
   1925 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1926 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
   1927 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1928 ; AVX1-NEXT:    retq
   1929 ;
   1930 ; AVX2-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
   1931 ; AVX2:       # BB#0:
   1932 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1933 ; AVX2-NEXT:    vpbroadcastw %xmm1, %xmm2
   1934 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,2,3]
   1935 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1936 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
   1937 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1938 ; AVX2-NEXT:    retq
   1939   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 8, i32 13, i32 12, i32 9, i32 8, i32 13, i32 12, i32 9, i32 8>
   1940   ret <16 x i16> %shuffle
   1941 }
   1942 
   1943 define <16 x i16> @shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12(<16 x i16> %a, <16 x i16> %b) {
   1944 ; AVX1-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
   1945 ; AVX1:       # BB#0:
   1946 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1947 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   1948 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,2,3]
   1949 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1950 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
   1951 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1952 ; AVX1-NEXT:    retq
   1953 ;
   1954 ; AVX2-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
   1955 ; AVX2:       # BB#0:
   1956 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1957 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm2
   1958 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,2,3]
   1959 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1960 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
   1961 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1962 ; AVX2-NEXT:    retq
   1963   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 12, i32 13, i32 12, i32 9, i32 8, i32 9, i32 8, i32 13, i32 12>
   1964   ret <16 x i16> %shuffle
   1965 }
   1966 
   1967 define <16 x i16> @shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08(<16 x i16> %a, <16 x i16> %b) {
   1968 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
   1969 ; AVX1:       # BB#0:
   1970 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1971 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
   1972 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,2,3]
   1973 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1974 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
   1975 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1976 ; AVX1-NEXT:    retq
   1977 ;
   1978 ; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
   1979 ; AVX2:       # BB#0:
   1980 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1981 ; AVX2-NEXT:    vpbroadcastw %xmm1, %xmm2
   1982 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,2,3]
   1983 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1984 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
   1985 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1986 ; AVX2-NEXT:    retq
   1987   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 8>
   1988   ret <16 x i16> %shuffle
   1989 }
   1990 
   1991 define <16 x i16> @shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12(<16 x i16> %a, <16 x i16> %b) {
   1992 ; AVX1-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
   1993 ; AVX1:       # BB#0:
   1994 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1995 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   1996 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,2,3]
   1997 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   1998 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
   1999 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2000 ; AVX1-NEXT:    retq
   2001 ;
   2002 ; AVX2-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
   2003 ; AVX2:       # BB#0:
   2004 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2005 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm2
   2006 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,2,3]
   2007 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2008 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
   2009 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2010 ; AVX2-NEXT:    retq
   2011   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 12>
   2012   ret <16 x i16> %shuffle
   2013 }
   2014 
   2015 define <16 x i16> @shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11(<16 x i16> %a, <16 x i16> %b) {
   2016 ; AVX1-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11:
   2017 ; AVX1:       # BB#0:
   2018 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2019 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
   2020 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2021 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2022 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2023 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2024 ; AVX1-NEXT:    retq
   2025 ;
   2026 ; AVX2-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11:
   2027 ; AVX2:       # BB#0:
   2028 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2029 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
   2030 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2031 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2032 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2033 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
   2034 ; AVX2-NEXT:    retq
   2035   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 11, i32 10, i32 14, i32 12, i32 8, i32 13, i32 9, i32 15, i32 11>
   2036   ret <16 x i16> %shuffle
   2037 }
   2038 
   2039 define <16 x i16> @shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11(<16 x i16> %a, <16 x i16> %b) {
   2040 ; AVX1-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11:
   2041 ; AVX1:       # BB#0:
   2042 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2043 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
   2044 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2045 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2046 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2047 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2048 ; AVX1-NEXT:    retq
   2049 ;
   2050 ; AVX2-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11:
   2051 ; AVX2:       # BB#0:
   2052 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2053 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
   2054 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2055 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2056 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2057 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
   2058 ; AVX2-NEXT:    retq
   2059   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 11, i32 10, i32 8, i32 14, i32 12, i32 13, i32 9, i32 15, i32 11>
   2060   ret <16 x i16> %shuffle
   2061 }
   2062 
   2063 define <16 x i16> @shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13(<16 x i16> %a, <16 x i16> %b) {
   2064 ; AVX1-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13:
   2065 ; AVX1:       # BB#0:
   2066 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2067 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
   2068 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2069 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
   2070 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2071 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2072 ; AVX1-NEXT:    retq
   2073 ;
   2074 ; AVX2-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13:
   2075 ; AVX2:       # BB#0:
   2076 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2077 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
   2078 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2079 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
   2080 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2081 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
   2082 ; AVX2-NEXT:    retq
   2083   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 13, i32 10, i32 14, i32 12, i32 8, i32 9, i32 11, i32 15, i32 13>
   2084   ret <16 x i16> %shuffle
   2085 }
   2086 
   2087 define <16 x i16> @shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11(<16 x i16> %a, <16 x i16> %b) {
   2088 ; AVX1-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
   2089 ; AVX1:       # BB#0:
   2090 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2091 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
   2092 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2093 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
   2094 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2095 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2096 ; AVX1-NEXT:    retq
   2097 ;
   2098 ; AVX2-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
   2099 ; AVX2:       # BB#0:
   2100 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2101 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
   2102 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2103 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
   2104 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2105 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
   2106 ; AVX2-NEXT:    retq
   2107   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 11, i32 14, i32 14, i32 15, i32 13, i32 9, i32 14, i32 12, i32 11>
   2108   ret <16 x i16> %shuffle
   2109 }
   2110 
   2111 define <16 x i16> @shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   2112 ; AVX1-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
   2113 ; AVX1:       # BB#0:
   2114 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2115 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   2116 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,14,15]
   2117 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2118 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
   2119 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2120 ; AVX1-NEXT:    retq
   2121 ;
   2122 ; AVX2-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
   2123 ; AVX2:       # BB#0:
   2124 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2125 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm2
   2126 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,14,15]
   2127 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2128 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
   2129 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2130 ; AVX2-NEXT:    retq
   2131   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 12, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
   2132   ret <16 x i16> %shuffle
   2133 }
   2134 
   2135 define <16 x i16> @shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   2136 ; AVX1-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
   2137 ; AVX1:       # BB#0:
   2138 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2139 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   2140 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,14,15]
   2141 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2142 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
   2143 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2144 ; AVX1-NEXT:    retq
   2145 ;
   2146 ; AVX2-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
   2147 ; AVX2:       # BB#0:
   2148 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2149 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm2
   2150 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,14,15]
   2151 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2152 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
   2153 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2154 ; AVX2-NEXT:    retq
   2155   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 12, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
   2156   ret <16 x i16> %shuffle
   2157 }
   2158 
   2159 define <16 x i16> @shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   2160 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
   2161 ; AVX1:       # BB#0:
   2162 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2163 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   2164 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
   2165 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2166 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
   2167 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2168 ; AVX1-NEXT:    retq
   2169 ;
   2170 ; AVX2-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
   2171 ; AVX2:       # BB#0:
   2172 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2173 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm2
   2174 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
   2175 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2176 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
   2177 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2178 ; AVX2-NEXT:    retq
   2179   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 12, i32 8, i32 12, i32 12, i32 8, i32 12, i32 12, i32 12, i32 12>
   2180   ret <16 x i16> %shuffle
   2181 }
   2182 
   2183 define <16 x i16> @shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   2184 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
   2185 ; AVX1:       # BB#0:
   2186 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2187 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
   2188 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,14,15]
   2189 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2190 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
   2191 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2192 ; AVX1-NEXT:    retq
   2193 ;
   2194 ; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
   2195 ; AVX2:       # BB#0:
   2196 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2197 ; AVX2-NEXT:    vpbroadcastw %xmm1, %xmm2
   2198 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,14,15]
   2199 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2200 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
   2201 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2202 ; AVX2-NEXT:    retq
   2203   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 8, i32 8, i32 8>
   2204   ret <16 x i16> %shuffle
   2205 }
   2206 
   2207 define <16 x i16> @shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
   2208 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
   2209 ; AVX1:       # BB#0:
   2210 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2211 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
   2212 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
   2213 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   2214 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
   2215 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7]
   2216 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2217 ; AVX1-NEXT:    retq
   2218 ;
   2219 ; AVX2-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
   2220 ; AVX2:       # BB#0:
   2221 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2222 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
   2223 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
   2224 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   2225 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
   2226 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7]
   2227 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2228 ; AVX2-NEXT:    retq
   2229   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 15, i32 8, i32 12, i32 12, i32 8, i32 12, i32 13, i32 14, i32 15>
   2230   ret <16 x i16> %shuffle
   2231 }
   2232 
   2233 define <16 x i16> @shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   2234 ; AVX1-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
   2235 ; AVX1:       # BB#0:
   2236 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2237 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   2238 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,14,15]
   2239 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2240 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
   2241 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2242 ; AVX1-NEXT:    retq
   2243 ;
   2244 ; AVX2-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
   2245 ; AVX2:       # BB#0:
   2246 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2247 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm2
   2248 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,14,15]
   2249 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2250 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
   2251 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2252 ; AVX2-NEXT:    retq
   2253   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 12, i32 8, i32 undef, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
   2254   ret <16 x i16> %shuffle
   2255 }
   2256 
   2257 define <16 x i16> @shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   2258 ; AVX1-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
   2259 ; AVX1:       # BB#0:
   2260 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2261 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   2262 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
   2263 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2264 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
   2265 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2266 ; AVX1-NEXT:    retq
   2267 ;
   2268 ; AVX2-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
   2269 ; AVX2:       # BB#0:
   2270 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2271 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm2
   2272 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
   2273 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2274 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
   2275 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2276 ; AVX2-NEXT:    retq
   2277   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 12, i32 12, i32 12, i32 undef, i32 8, i32 12, i32 12, i32 12, i32 12>
   2278   ret <16 x i16> %shuffle
   2279 }
   2280 
   2281 define <16 x i16> @shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   2282 ; AVX1-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
   2283 ; AVX1:       # BB#0:
   2284 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2285 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   2286 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
   2287 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2288 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
   2289 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2290 ; AVX1-NEXT:    retq
   2291 ;
   2292 ; AVX2-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
   2293 ; AVX2:       # BB#0:
   2294 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2295 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm2
   2296 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
   2297 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2298 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
   2299 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2300 ; AVX2-NEXT:    retq
   2301   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 12, i32 undef, i32 12, i32 12, i32 8, i32 12, i32 12, i32 12, i32 12>
   2302   ret <16 x i16> %shuffle
   2303 }
   2304 
   2305 define <16 x i16> @shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   2306 ; AVX1-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu:
   2307 ; AVX1:       # BB#0:
   2308 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2309 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
   2310 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
   2311 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2312 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2313 ; AVX1-NEXT:    retq
   2314 ;
   2315 ; AVX2-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu:
   2316 ; AVX2:       # BB#0:
   2317 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15,16,17,18,19,20,21,30,31,20,21,30,31,28,29,30,31]
   2318 ; AVX2-NEXT:    retq
   2319   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
   2320   ret <16 x i16> %shuffle
   2321 }
   2322 
   2323 define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11(<16 x i16> %a, <16 x i16> %b) {
   2324 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11:
   2325 ; AVX1:       # BB#0:
   2326 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2327 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
   2328 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2329 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
   2330 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2331 ; AVX1-NEXT:    retq
   2332 ;
   2333 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11:
   2334 ; AVX2:       # BB#0:
   2335 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2336 ; AVX2-NEXT:    vpbroadcastq %xmm1, %xmm2
   2337 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2338 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
   2339 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2340 ; AVX2-NEXT:    retq
   2341   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11>
   2342   ret <16 x i16> %shuffle
   2343 }
   2344 
   2345 define <16 x i16> @shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   2346 ; AVX1-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu:
   2347 ; AVX1:       # BB#0:
   2348 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2349 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
   2350 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
   2351 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2352 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2353 ; AVX1-NEXT:    retq
   2354 ;
   2355 ; AVX2-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu:
   2356 ; AVX2:       # BB#0:
   2357 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3,24,25,26,27,28,29,22,23,24,25,26,27,16,17,18,19]
   2358 ; AVX2-NEXT:    retq
   2359   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
   2360   ret <16 x i16> %shuffle
   2361 }
   2362 
   2363 define <16 x i16> @shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11(<16 x i16> %a, <16 x i16> %b) {
   2364 ; AVX1-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11:
   2365 ; AVX1:       # BB#0:
   2366 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2367 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
   2368 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2369 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2370 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2371 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2372 ; AVX1-NEXT:    retq
   2373 ;
   2374 ; AVX2-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11:
   2375 ; AVX2:       # BB#0:
   2376 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2377 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
   2378 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2379 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2380 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2381 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
   2382 ; AVX2-NEXT:    retq
   2383   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 11, i32 8, i32 9, i32 10, i32 15, i32 12, i32 13, i32 14, i32 11>
   2384   ret <16 x i16> %shuffle
   2385 }
   2386 
   2387 define <16 x i16> @shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15(<16 x i16> %a, <16 x i16> %b) {
   2388 ; AVX1-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15:
   2389 ; AVX1:       # BB#0:
   2390 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2391 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,2,3]
   2392 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   2393 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
   2394 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2395 ; AVX1-NEXT:    retq
   2396 ;
   2397 ; AVX2-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15:
   2398 ; AVX2:       # BB#0:
   2399 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2400 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,2,3]
   2401 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   2402 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
   2403 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2404 ; AVX2-NEXT:    retq
   2405   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 15, i32 12, i32 13, i32 14, i32 11, i32 8, i32 9, i32 10, i32 15>
   2406   ret <16 x i16> %shuffle
   2407 }
   2408 
   2409 define <16 x i16> @shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13(<16 x i16> %a, <16 x i16> %b) {
   2410 ; AVX1-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
   2411 ; AVX1:       # BB#0:
   2412 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2413 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
   2414 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2415 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
   2416 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2417 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2418 ; AVX1-NEXT:    retq
   2419 ;
   2420 ; AVX2-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
   2421 ; AVX2:       # BB#0:
   2422 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2423 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
   2424 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2425 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
   2426 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2427 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
   2428 ; AVX2-NEXT:    retq
   2429   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 13, i32 11, i32 15, i32 9, i32 8, i32 10, i32 15, i32 11, i32 13>
   2430   ret <16 x i16> %shuffle
   2431 }
   2432 
   2433 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) {
   2434 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
   2435 ; AVX1:       # BB#0:
   2436 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2437 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   2438 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
   2439 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   2440 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15]
   2441 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2442 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2443 ; AVX1-NEXT:    retq
   2444 ;
   2445 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
   2446 ; AVX2:       # BB#0:
   2447 ; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
   2448 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   2449 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   2450 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,2,4,5,6,7]
   2451 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,7]
   2452 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
   2453 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
   2454 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
   2455 ; AVX2-NEXT:    retq
   2456   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 27, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
   2457   ret <16 x i16> %shuffle
   2458 }
   2459 
   2460 define <16 x i16> @shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31(<16 x i16> %a, <16 x i16> %b) {
   2461 ; AVX1-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31:
   2462 ; AVX1:       # BB#0:
   2463 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2464 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   2465 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm3[2,3,0,1]
   2466 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
   2467 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
   2468 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15]
   2469 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2470 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2471 ; AVX1-NEXT:    retq
   2472 ;
   2473 ; AVX2-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31:
   2474 ; AVX2:       # BB#0:
   2475 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
   2476 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15]
   2477 ; AVX2-NEXT:    vpshufb %xmm1, %xmm0, %xmm2
   2478 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
   2479 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm0[7]
   2480 ; AVX2-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
   2481 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm2, %ymm0
   2482 ; AVX2-NEXT:    retq
   2483   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 20, i32 1, i32 21, i32 2, i32 22, i32 3, i32 31, i32 8, i32 28, i32 9, i32 29, i32 10, i32 30, i32 11, i32 31>
   2484   ret <16 x i16> %shuffle
   2485 }
   2486 
   2487 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
   2488 ; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
   2489 ; AVX1:       # BB#0:
   2490 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2491 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   2492 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
   2493 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   2494 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15]
   2495 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   2496 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2497 ; AVX1-NEXT:    retq
   2498 ;
   2499 ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
   2500 ; AVX2:       # BB#0:
   2501 ; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
   2502 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm3 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
   2503 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   2504 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,2,4,5,6,7]
   2505 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,7]
   2506 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
   2507 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm0 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
   2508 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
   2509 ; AVX2-NEXT:    retq
   2510   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 31, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
   2511   ret <16 x i16> %shuffle
   2512 }
   2513 
   2514 define <16 x i16> @shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27(<16 x i16> %a, <16 x i16> %b) {
   2515 ; AVX1-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27:
   2516 ; AVX1:       # BB#0:
   2517 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2518 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   2519 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
   2520 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
   2521 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   2522 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15]
   2523 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   2524 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2525 ; AVX1-NEXT:    retq
   2526 ;
   2527 ; AVX2-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27:
   2528 ; AVX2:       # BB#0:
   2529 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
   2530 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2531 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2532 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [8,9,0,1,10,11,2,3,12,13,4,5,14,15,6,7]
   2533 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2534 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
   2535 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2536 ; AVX2-NEXT:    retq
   2537   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 16, i32 5, i32 17, i32 6, i32 18, i32 7, i32 27, i32 12, i32 24, i32 13, i32 25, i32 14, i32 26, i32 15, i32 27>
   2538   ret <16 x i16> %shuffle
   2539 }
   2540 
   2541 define <16 x i16> @shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
   2542 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
   2543 ; AVX1:       # BB#0:
   2544 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2545 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[0,3,2,3]
   2546 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
   2547 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm4[0,3,2,3]
   2548 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
   2549 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,1,4,5,6,7]
   2550 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
   2551 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7]
   2552 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15]
   2553 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
   2554 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2555 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2556 ; AVX1-NEXT:    retq
   2557 ;
   2558 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
   2559 ; AVX2:       # BB#0:
   2560 ; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
   2561 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,1,4,5,6,7]
   2562 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
   2563 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3]
   2564 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[0,0,2,1,4,5,6,7]
   2565 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
   2566 ; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
   2567 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15]
   2568 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,7,7,8,9,10,11,14,13,15,15]
   2569 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
   2570 ; AVX2-NEXT:    retq
   2571   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 6, i32 22, i32 7, i32 31, i32 8, i32 24, i32 9, i32 25, i32 14, i32 30, i32 15, i32 31>
   2572   ret <16 x i16> %shuffle
   2573 }
   2574 
   2575 define <16 x i16> @shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25(<16 x i16> %a, <16 x i16> %b) {
   2576 ; AVX1-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
   2577 ; AVX1:       # BB#0:
   2578 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2579 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[2,0,2,3]
   2580 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
   2581 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm4[0,3,2,3]
   2582 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
   2583 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7]
   2584 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,0,1,2,3,2,3,0,1,12,13,2,3]
   2585 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
   2586 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2587 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2588 ; AVX1-NEXT:    retq
   2589 ;
   2590 ; AVX2-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
   2591 ; AVX2:       # BB#0:
   2592 ; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
   2593 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [8,9,8,9,4,5,10,11,0,1,0,1,12,13,2,3]
   2594 ; AVX2-NEXT:    vpshufb %xmm3, %xmm2, %xmm4
   2595 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7]
   2596 ; AVX2-NEXT:    vpshufb %xmm3, %xmm1, %xmm1
   2597 ; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm1, %ymm1
   2598 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15]
   2599 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,7,7,8,9,10,11,14,13,15,15]
   2600 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
   2601 ; AVX2-NEXT:    retq
   2602   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 20, i32 1, i32 21, i32 6, i32 16, i32 7, i32 25, i32 8, i32 28, i32 9, i32 29, i32 14, i32 24, i32 15, i32 25>
   2603   ret <16 x i16> %shuffle
   2604 }
   2605 
   2606 define <16 x i16> @shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26(<16 x i16> %a, <16 x i16> %b) {
   2607 ; AVX1-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
   2608 ; AVX1:       # BB#0:
   2609 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2610 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   2611 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,12,13,10,11,8,9,10,11,12,13,10,11]
   2612 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm3 = xmm0[1,0,3,2,4,5,6,7]
   2613 ; AVX1-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
   2614 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   2615 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   2616 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
   2617 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
   2618 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   2619 ; AVX1-NEXT:    retq
   2620 ;
   2621 ; AVX2-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
   2622 ; AVX2:       # BB#0:
   2623 ; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
   2624 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   2625 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,1,2,0,4,5,6,7]
   2626 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,5]
   2627 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[0,1,2,3,2,3,0,1,8,9,10,11,6,7,4,5]
   2628 ; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
   2629 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,4,5,6,7,6,7,4,5,4,5,6,7,18,19,16,17,20,21,22,23,22,23,20,21,20,21,22,23]
   2630 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
   2631 ; AVX2-NEXT:    retq
   2632   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 0, i32 17, i32 16, i32 3, i32 2, i32 19, i32 26, i32 9, i32 8, i32 25, i32 24, i32 11, i32 10, i32 27, i32 26>
   2633   ret <16 x i16> %shuffle
   2634 }
   2635 
   2636 define <16 x i16> @shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11(<16 x i16> %a, <16 x i16> %b) {
   2637 ; AVX1-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
   2638 ; AVX1:       # BB#0:
   2639 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2640 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   2641 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
   2642 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   2643 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15]
   2644 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
   2645 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2646 ; AVX1-NEXT:    retq
   2647 ;
   2648 ; AVX2-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
   2649 ; AVX2:       # BB#0:
   2650 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
   2651 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   2652 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   2653 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
   2654 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
   2655 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
   2656 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11]
   2657 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
   2658 ; AVX2-NEXT:    retq
   2659   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 0, i32 17, i32 1, i32 18, i32 2, i32 19, i32 11, i32 24, i32 8, i32 25, i32 9, i32 26, i32 10, i32 27, i32 11>
   2660   ret <16 x i16> %shuffle
   2661 }
   2662 
   2663 define <16 x i16> @shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15(<16 x i16> %a, <16 x i16> %b) {
   2664 ; AVX1-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
   2665 ; AVX1:       # BB#0:
   2666 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2667 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   2668 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
   2669 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
   2670 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15]
   2671 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
   2672 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2673 ; AVX1-NEXT:    retq
   2674 ;
   2675 ; AVX2-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
   2676 ; AVX2:       # BB#0:
   2677 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
   2678 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm3 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
   2679 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
   2680 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
   2681 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
   2682 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
   2683 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15]
   2684 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
   2685 ; AVX2-NEXT:    retq
   2686   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 4, i32 21, i32 5, i32 22, i32 6, i32 23, i32 15, i32 28, i32 12, i32 29, i32 13, i32 30, i32 14, i32 31, i32 15>
   2687   ret <16 x i16> %shuffle
   2688 }
   2689 
   2690 define <16 x i16> @shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31(<16 x i16> %a, <16 x i16> %b) {
   2691 ; AVX1-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31:
   2692 ; AVX1:       # BB#0:
   2693 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2694 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[0,2,1,3,4,5,6,7]
   2695 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   2696 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
   2697 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm4 = xmm4[0,2,1,3,4,5,6,7]
   2698 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
   2699 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
   2700 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,4,5,14,15,0,1,4,5,4,5,6,7]
   2701 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
   2702 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   2703 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2704 ; AVX1-NEXT:    retq
   2705 ;
   2706 ; AVX2-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31:
   2707 ; AVX2:       # BB#0:
   2708 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
   2709 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2710 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
   2711 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
   2712 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   2713 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
   2714 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
   2715 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2716 ; AVX2-NEXT:    retq
   2717   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 2, i32 1, i32 3, i32 20, i32 22, i32 21, i32 31, i32 8, i32 10, i32 9, i32 11, i32 28, i32 30, i32 29, i32 31>
   2718   ret <16 x i16> %shuffle
   2719 }
   2720 
   2721 define <16 x i16> @shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   2722 ; AVX1-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu:
   2723 ; AVX1:       # BB#0:
   2724 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   2725 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,1,2,3]
   2726 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[0,0,3,2,4,5,6,7]
   2727 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
   2728 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   2729 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   2730 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
   2731 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
   2732 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
   2733 ; AVX1-NEXT:    retq
   2734 ;
   2735 ; AVX2-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu:
   2736 ; AVX2:       # BB#0:
   2737 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15]
   2738 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,1,2,3,6,5,6,7]
   2739 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,3,2,4,5,6,7,8,8,11,10,12,13,14,15]
   2740 ; AVX2-NEXT:    retq
   2741   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 3, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 12, i32 11, i32 26, i32 undef, i32 undef, i32 undef, i32 undef>
   2742   ret <16 x i16> %shuffle
   2743 }
   2744 
   2745 define <16 x i16> @shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   2746 ; AVX1-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu:
   2747 ; AVX1:       # BB#0:
   2748 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2749 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   2750 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
   2751 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
   2752 ; AVX1-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
   2753 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
   2754 ; AVX1-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
   2755 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2756 ; AVX1-NEXT:    retq
   2757 ;
   2758 ; AVX2-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu:
   2759 ; AVX2:       # BB#0:
   2760 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
   2761 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3,16,17,22,23,20,21,26,27,16,17,26,27,16,17,18,19]
   2762 ; AVX2-NEXT:    retq
   2763   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 3, i32 2, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 11, i32 10, i32 29, i32 undef, i32 undef, i32 undef, i32 undef>
   2764   ret <16 x i16> %shuffle
   2765 }
   2766 
   2767 define <16 x i16> @shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   2768 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu:
   2769 ; AVX1:       # BB#0:
   2770 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm1[0,2,2,3,4,6,6,7]
   2771 ; AVX1-NEXT:    retq
   2772 ;
   2773 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu:
   2774 ; AVX2:       # BB#0:
   2775 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm1[0,2,2,3,4,6,6,7]
   2776 ; AVX2-NEXT:    retq
   2777   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 29, i32 undef, i32 undef, i32 undef, i32 undef>
   2778   ret <16 x i16> %shuffle
   2779 }
   2780 
   2781 define <16 x i16> @shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   2782 ; AVX1-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu:
   2783 ; AVX1:       # BB#0:
   2784 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2785 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   2786 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
   2787 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[3],xmm2[4,5,6,7]
   2788 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
   2789 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2790 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2791 ; AVX1-NEXT:    retq
   2792 ;
   2793 ; AVX2-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu:
   2794 ; AVX2:       # BB#0:
   2795 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
   2796 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
   2797 ; AVX2-NEXT:    retq
   2798   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 29, i32 undef, i32 undef, i32 undef, i32 undef>
   2799   ret <16 x i16> %shuffle
   2800 }
   2801 
   2802 define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11(<16 x i16> %a, <16 x i16> %b) {
   2803 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11:
   2804 ; AVX1:       # BB#0:
   2805 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2806 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   2807 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
   2808 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm0[7]
   2809 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
   2810 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2811 ; AVX1-NEXT:    retq
   2812 ;
   2813 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11:
   2814 ; AVX2:       # BB#0:
   2815 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,2]
   2816 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
   2817 ; AVX2-NEXT:    retq
   2818   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 20, i32 21, i32 22, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 11>
   2819   ret <16 x i16> %shuffle
   2820 }
   2821 
   2822 define <16 x i16> @shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   2823 ; AVX1-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu:
   2824 ; AVX1:       # BB#0:
   2825 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2826 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   2827 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
   2828 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[3],xmm3[4,5,6,7]
   2829 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
   2830 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
   2831 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2832 ; AVX1-NEXT:    retq
   2833 ;
   2834 ; AVX2-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu:
   2835 ; AVX2:       # BB#0:
   2836 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7]
   2837 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6,7,8,9,10],ymm0[11],ymm1[12,13,14,15]
   2838 ; AVX2-NEXT:    retq
   2839   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 21, i32 22, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
   2840   ret <16 x i16> %shuffle
   2841 }
   2842 
   2843 define <16 x i16> @shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11(<16 x i16> %a, <16 x i16> %b) {
   2844 ; AVX1-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11:
   2845 ; AVX1:       # BB#0:
   2846 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2847 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
   2848 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm1[0,2,2,3]
   2849 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3,4,5,6],xmm0[7]
   2850 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
   2851 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
   2852 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
   2853 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2854 ; AVX1-NEXT:    retq
   2855 ;
   2856 ; AVX2-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11:
   2857 ; AVX2:       # BB#0:
   2858 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
   2859 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2860 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2861 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
   2862 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2863 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
   2864 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2865 ; AVX2-NEXT:    retq
   2866   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 21, i32 20, i32 21, i32 22, i32 11, i32 8, i32 9, i32 10, i32 29, i32 28, i32 29, i32 30, i32 11>
   2867   ret <16 x i16> %shuffle
   2868 }
   2869 
   2870 define <16 x i16> @shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15(<16 x i16> %a, <16 x i16> %b) {
   2871 ; AVX1-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15:
   2872 ; AVX1:       # BB#0:
   2873 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2874 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   2875 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2,3],xmm2[4,5,6],xmm3[7]
   2876 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
   2877 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
   2878 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2879 ; AVX1-NEXT:    retq
   2880 ;
   2881 ; AVX2-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15:
   2882 ; AVX2:       # BB#0:
   2883 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
   2884 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4,5,6],ymm0[7,8],ymm1[9],ymm0[10,11],ymm1[12,13,14],ymm0[15]
   2885 ; AVX2-NEXT:    retq
   2886   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 20, i32 21, i32 22, i32 15, i32 8, i32 25, i32 10, i32 11, i32 28, i32 29, i32 30, i32 15>
   2887   ret <16 x i16> %shuffle
   2888 }
   2889 
   2890 define <16 x i16> @shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25(<16 x i16> %a, <16 x i16> %b) {
   2891 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
   2892 ; AVX1:       # BB#0:
   2893 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
   2894 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
   2895 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2896 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[0,1,2,1,4,5,6,7]
   2897 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,7,7]
   2898 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm1[7]
   2899 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
   2900 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
   2901 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   2902 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2903 ; AVX1-NEXT:    retq
   2904 ;
   2905 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
   2906 ; AVX2:       # BB#0:
   2907 ; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm2
   2908 ; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
   2909 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,1,2,1,4,5,6,7,8,9,10,9,12,13,14,15]
   2910 ; AVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,7,7,8,9,10,11,12,13,15,15]
   2911 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
   2912 ; AVX2-NEXT:    retq
   2913   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 25, i32 undef, i32 undef, i32 undef, i32 9, i32 undef, i32 13, i32 15, i32 25>
   2914   ret <16 x i16> %shuffle
   2915 }
   2916 
   2917 define <16 x i16> @shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu(<16 x i16> %a, <16 x i16> %b) {
   2918 ; AVX1-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu:
   2919 ; AVX1:       # BB#0:
   2920 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2921 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
   2922 ; AVX1-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
   2923 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
   2924 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm4[2,2,3,3]
   2925 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7]
   2926 ; AVX1-NEXT:    vpshufb %xmm3, %xmm1, %xmm1
   2927 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
   2928 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
   2929 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2930 ; AVX1-NEXT:    retq
   2931 ;
   2932 ; AVX2-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu:
   2933 ; AVX2:       # BB#0:
   2934 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5,16,17,20,21,20,21,22,23,16,17,20,21,24,25,20,21]
   2935 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
   2936 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
   2937 ; AVX2-NEXT:    retq
   2938   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 16, i32 18, i32 20, i32 undef, i32 undef, i32 undef, i32 12, i32 undef, i32 24, i32 26, i32 28, i32 undef>
   2939   ret <16 x i16> %shuffle
   2940 }
   2941 
   2942 define <16 x i16> @shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12(<16 x i16> %a, <16 x i16> %b) {
   2943 ; AVX1-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12:
   2944 ; AVX1:       # BB#0:
   2945 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2946 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   2947 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
   2948 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
   2949 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
   2950 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   2951 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7]
   2952 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2953 ; AVX1-NEXT:    retq
   2954 ;
   2955 ; AVX2-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12:
   2956 ; AVX2:       # BB#0:
   2957 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5,6,7],ymm0[8,9,10,11,12],ymm1[13,14,15]
   2958 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2959 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
   2960 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   2961 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   2962 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2963 ; AVX2-NEXT:    retq
   2964   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 12, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12>
   2965   ret <16 x i16> %shuffle
   2966 }
   2967 
   2968 define <16 x i16> @shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) {
   2969 ; AVX1-LABEL: shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu:
   2970 ; AVX1:       # BB#0:
   2971 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2972 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   2973 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
   2974 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
   2975 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2976 ; AVX1-NEXT:    retq
   2977 ;
   2978 ; AVX2-LABEL: shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu:
   2979 ; AVX2:       # BB#0:
   2980 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[10,11,12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9],ymm1[26,27,28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25]
   2981 ; AVX2-NEXT:    retq
   2982   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 22, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 30, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
   2983   ret <16 x i16> %shuffle
   2984 }
   2985 
   2986 define <16 x i16> @shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12(<16 x i16> %a, <16 x i16> %b) {
   2987 ; AVX1-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12:
   2988 ; AVX1:       # BB#0:
   2989 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2990 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
   2991 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   2992 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   2993 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2994 ; AVX1-NEXT:    retq
   2995 ;
   2996 ; AVX2-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12:
   2997 ; AVX2:       # BB#0:
   2998 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2999 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
   3000 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   3001 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   3002 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   3003 ; AVX2-NEXT:    retq
   3004   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12>
   3005   ret <16 x i16> %shuffle
   3006 }
   3007 
   3008 define <16 x i16> @shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) {
   3009 ; AVX1-LABEL: shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu:
   3010 ; AVX1:       # BB#0:
   3011 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   3012 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   3013 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   3014 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   3015 ; AVX1-NEXT:    retq
   3016 ;
   3017 ; AVX2-LABEL: shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu:
   3018 ; AVX2:       # BB#0:
   3019 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
   3020 ; AVX2-NEXT:    retq
   3021   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 14, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
   3022   ret <16 x i16> %shuffle
   3023 }
   3024 
   3025 define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) {
   3026 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu:
   3027 ; AVX1:       # BB#0:
   3028 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
   3029 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   3030 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
   3031 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   3032 ; AVX1-NEXT:    retq
   3033 ;
   3034 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu:
   3035 ; AVX2:       # BB#0:
   3036 ; AVX2-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9],zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25]
   3037 ; AVX2-NEXT:    retq
   3038   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
   3039   ret <16 x i16> %shuffle
   3040 }
   3041 
   3042 define <16 x i16> @shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10(<16 x i16> %a, <16 x i16> %b) {
   3043 ; AVX1-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10:
   3044 ; AVX1:       # BB#0:
   3045 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   3046 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   3047 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
   3048 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
   3049 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,4,5,6,7,8,9,0,1,4,5,10,11]
   3050 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   3051 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4],xmm0[5,6,7]
   3052 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3053 ; AVX1-NEXT:    retq
   3054 ;
   3055 ; AVX2-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10:
   3056 ; AVX2:       # BB#0:
   3057 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7],ymm0[8,9,10],ymm1[11,12,13,14,15]
   3058 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   3059 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   3060 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3061 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3062 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   3063 ; AVX2-NEXT:    retq
   3064   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 10, i32 27, i32 28, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10>
   3065   ret <16 x i16> %shuffle
   3066 }
   3067 
   3068 define <16 x i16> @shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu(<16 x i16> %a, <16 x i16> %b) {
   3069 ; AVX1-LABEL: shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu:
   3070 ; AVX1:       # BB#0:
   3071 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   3072 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   3073 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
   3074 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
   3075 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3076 ; AVX1-NEXT:    retq
   3077 ;
   3078 ; AVX2-LABEL: shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu:
   3079 ; AVX2:       # BB#0:
   3080 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3,4,5],ymm1[22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19,20,21]
   3081 ; AVX2-NEXT:    retq
   3082   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 20, i32 21, i32 22, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 undef, i32 undef, i32 9, i32 undef>
   3083   ret <16 x i16> %shuffle
   3084 }
   3085 
   3086 define <16 x i16> @shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10(<16 x i16> %a, <16 x i16> %b) {
   3087 ; AVX1-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10:
   3088 ; AVX1:       # BB#0:
   3089 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   3090 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   3091 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3092 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3093 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   3094 ; AVX1-NEXT:    retq
   3095 ;
   3096 ; AVX2-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10:
   3097 ; AVX2:       # BB#0:
   3098 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   3099 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   3100 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3101 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3102 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   3103 ; AVX2-NEXT:    retq
   3104   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10>
   3105   ret <16 x i16> %shuffle
   3106 }
   3107 
   3108 define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu(<16 x i16> %a, <16 x i16> %b) {
   3109 ; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu:
   3110 ; AVX1:       # BB#0:
   3111 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3112 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   3113 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3114 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   3115 ; AVX1-NEXT:    retq
   3116 ;
   3117 ; AVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu:
   3118 ; AVX2:       # BB#0:
   3119 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
   3120 ; AVX2-NEXT:    retq
   3121   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 9, i32 undef>
   3122   ret <16 x i16> %shuffle
   3123 }
   3124 
   3125 define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   3126 ; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu:
   3127 ; AVX1:       # BB#0:
   3128 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   3129 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   3130 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   3131 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   3132 ; AVX1-NEXT:    retq
   3133 ;
   3134 ; AVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu:
   3135 ; AVX2:       # BB#0:
   3136 ; AVX2-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,ymm0[22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero
   3137 ; AVX2-NEXT:    retq
   3138   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 undef, i32 undef>
   3139   ret <16 x i16> %shuffle
   3140 }
   3141 
   3142 define <16 x i16> @shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26(<16 x i16> %a, <16 x i16> %b) {
   3143 ; AVX1-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26:
   3144 ; AVX1:       # BB#0:
   3145 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   3146 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   3147 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
   3148 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
   3149 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,8,9,0,1,4,5,10,11]
   3150 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   3151 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5,6,7]
   3152 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3153 ; AVX1-NEXT:    retq
   3154 ;
   3155 ; AVX2-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26:
   3156 ; AVX2:       # BB#0:
   3157 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15]
   3158 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   3159 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   3160 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3161 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   3162 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   3163 ; AVX2-NEXT:    retq
   3164   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 26, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26>
   3165   ret <16 x i16> %shuffle
   3166 }
   3167 
   3168 define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu(<16 x i16> %a, <16 x i16> %b) {
   3169 ; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu:
   3170 ; AVX1:       # BB#0:
   3171 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   3172 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   3173 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
   3174 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
   3175 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3176 ; AVX1-NEXT:    retq
   3177 ;
   3178 ; AVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu:
   3179 ; AVX2:       # BB#0:
   3180 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3,4,5],ymm0[22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19,20,21]
   3181 ; AVX2-NEXT:    retq
   3182   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 17, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 25, i32 undef>
   3183   ret <16 x i16> %shuffle
   3184 }
   3185 
   3186 define <16 x i16> @shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28(<16 x i16> %a, <16 x i16> %b) {
   3187 ; AVX1-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28:
   3188 ; AVX1:       # BB#0:
   3189 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   3190 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   3191 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
   3192 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
   3193 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
   3194 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   3195 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3,4,5,6,7]
   3196 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3197 ; AVX1-NEXT:    retq
   3198 ;
   3199 ; AVX2-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28:
   3200 ; AVX2:       # BB#0:
   3201 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4],ymm0[5,6,7],ymm1[8,9,10,11,12],ymm0[13,14,15]
   3202 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   3203 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
   3204 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   3205 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   3206 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   3207 ; AVX2-NEXT:    retq
   3208   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 28, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28>
   3209   ret <16 x i16> %shuffle
   3210 }
   3211 
   3212 define <16 x i16> @shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu(<16 x i16> %a, <16 x i16> %b) {
   3213 ; AVX1-LABEL: shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu:
   3214 ; AVX1:       # BB#0:
   3215 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   3216 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   3217 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
   3218 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
   3219 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3220 ; AVX1-NEXT:    retq
   3221 ;
   3222 ; AVX2-LABEL: shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu:
   3223 ; AVX2:       # BB#0:
   3224 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9],ymm0[26,27,28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25]
   3225 ; AVX2-NEXT:    retq
   3226   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 14, i32 undef, i32 undef, i32 25, i32 26, i32 27, i32 undef>
   3227   ret <16 x i16> %shuffle
   3228 }
   3229 
   3230 define <16 x i16> @shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu(<16 x i16> %a, <16 x i16> %b) {
   3231 ; AVX1-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu:
   3232 ; AVX1:       # BB#0:
   3233 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   3234 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   3235 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   3236 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,5,4,4]
   3237 ; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
   3238 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   3239 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
   3240 ; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
   3241 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3242 ; AVX1-NEXT:    retq
   3243 ;
   3244 ; AVX2-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu:
   3245 ; AVX2:       # BB#0:
   3246 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6],ymm1[7],ymm0[8,9,10,11],ymm1[12],ymm0[13,14],ymm1[15]
   3247 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,6,7,6,7,8,9,8,9,10,11,14,15,30,31,30,31,22,23,22,23,24,25,24,25,26,27,30,31]
   3248 ; AVX2-NEXT:    retq
   3249   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 undef, i32 3, i32 undef, i32 20, i32 20, i32 5, i32 undef, i32 31, i32 undef, i32 11, i32 undef, i32 28, i32 28, i32 13, i32 undef>
   3250   ret <16 x i16> %shuffle
   3251 }
   3252 
   3253 define <16 x i16> @shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3(<16 x i16> %a, <16 x i16> %b) {
   3254 ; AVX1-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
   3255 ; AVX1:       # BB#0:
   3256 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
   3257 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   3258 ; AVX1-NEXT:    retq
   3259 ;
   3260 ; AVX2-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
   3261 ; AVX2:       # BB#0:
   3262 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
   3263 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
   3264 ; AVX2-NEXT:    retq
   3265   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
   3266   ret <16 x i16> %shuffle
   3267 }
   3268 
   3269 define <16 x i16> @shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8(<16 x i16> %a, <16 x i16> %b) {
   3270 ; AVX1-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8:
   3271 ; AVX1:       # BB#0:
   3272 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   3273 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
   3274 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   3275 ; AVX1-NEXT:    retq
   3276 ;
   3277 ; AVX2-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8:
   3278 ; AVX2:       # BB#0:
   3279 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
   3280 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
   3281 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
   3282 ; AVX2-NEXT:    retq
   3283   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
   3284   ret <16 x i16> %shuffle
   3285 }
   3286 
   3287 define <16 x i16> @shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) {
   3288 ; AVX1-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
   3289 ; AVX1:       # BB#0:
   3290 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   3291 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
   3292 ; AVX1-NEXT:    retq
   3293 ;
   3294 ; AVX2-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
   3295 ; AVX2:       # BB#0:
   3296 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
   3297 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
   3298 ; AVX2-NEXT:    retq
   3299   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   3300   ret <16 x i16> %shuffle
   3301 }
   3302 
   3303 define <16 x i16> @insert_v16i16_0elt_into_zero_vector(i16* %ptr) {
   3304 ; ALL-LABEL: insert_v16i16_0elt_into_zero_vector:
   3305 ; ALL:       # BB#0:
   3306 ; ALL-NEXT:    movzwl (%rdi), %eax
   3307 ; ALL-NEXT:    vmovd %eax, %xmm0
   3308 ; ALL-NEXT:    retq
   3309   %val = load i16, i16* %ptr
   3310   %i0 = insertelement <16 x i16> zeroinitializer, i16 %val, i32 0
   3311   ret <16 x i16> %i0
   3312 }
   3313 
   3314 define <16 x i16> @concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31(<16 x i16> %a, <16 x i16> %b) {
   3315 ; AVX1-LABEL: concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31:
   3316 ; AVX1:       # BB#0:
   3317 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
   3318 ; AVX1-NEXT:    retq
   3319 ;
   3320 ; AVX2-LABEL: concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31:
   3321 ; AVX2:       # BB#0:
   3322 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
   3323 ; AVX2-NEXT:    retq
   3324   %alo = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   3325   %bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   3326   %shuf = shufflevector <8 x i16> %alo, <8 x i16> %bhi, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   3327   ret <16 x i16> %shuf
   3328 }
   3329 
   3330 define <16 x i16> @concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc(<16 x i16> %a, <16 x i16> %b) {
   3331 ; ALL-LABEL: concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc:
   3332 ; ALL:       # BB#0:
   3333 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
   3334 ; ALL-NEXT:    retq
   3335   %ahi = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   3336   %bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   3337   %bc0hi = bitcast <8 x i16> %ahi to <16 x i8>
   3338   %bc1hi = bitcast <8 x i16> %bhi to <16 x i8>
   3339   %shuffle8 = shufflevector <16 x i8> %bc0hi, <16 x i8> %bc1hi, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
   3340   %shuffle16 = bitcast <32 x i8> %shuffle8 to <16 x i16>
   3341   ret <16 x i16> %shuffle16
   3342 }
   3343 
   3344 define <16 x i16> @insert_dup_mem_v16i16_i32(i32* %ptr) {
   3345 ; AVX1-LABEL: insert_dup_mem_v16i16_i32:
   3346 ; AVX1:       # BB#0:
   3347 ; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   3348 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
   3349 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   3350 ; AVX1-NEXT:    retq
   3351 ;
   3352 ; AVX2-LABEL: insert_dup_mem_v16i16_i32:
   3353 ; AVX2:       # BB#0:
   3354 ; AVX2-NEXT:    vpbroadcastw (%rdi), %ymm0
   3355 ; AVX2-NEXT:    retq
   3356   %tmp = load i32, i32* %ptr, align 4
   3357   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
   3358   %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
   3359   %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> zeroinitializer
   3360   ret <16 x i16> %tmp3
   3361 }
   3362 
   3363 define <16 x i16> @insert_dup_mem_v16i16_sext_i16(i16* %ptr) {
   3364 ; AVX1-LABEL: insert_dup_mem_v16i16_sext_i16:
   3365 ; AVX1:       # BB#0:
   3366 ; AVX1-NEXT:    movswl (%rdi), %eax
   3367 ; AVX1-NEXT:    vmovd %eax, %xmm0
   3368 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
   3369 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   3370 ; AVX1-NEXT:    retq
   3371 ;
   3372 ; AVX2-LABEL: insert_dup_mem_v16i16_sext_i16:
   3373 ; AVX2:       # BB#0:
   3374 ; AVX2-NEXT:    movswl (%rdi), %eax
   3375 ; AVX2-NEXT:    vmovd %eax, %xmm0
   3376 ; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
   3377 ; AVX2-NEXT:    retq
   3378   %tmp = load i16, i16* %ptr, align 2
   3379   %tmp1 = sext i16 %tmp to i32
   3380   %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
   3381   %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
   3382   %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <16 x i32> zeroinitializer
   3383   ret <16 x i16> %tmp4
   3384 }
   3385 
   3386 define <16 x i16> @insert_dup_elt1_mem_v16i16_i32(i32* %ptr) #0 {
   3387 ; AVX1-LABEL: insert_dup_elt1_mem_v16i16_i32:
   3388 ; AVX1:       # BB#0:
   3389 ; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   3390 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
   3391 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   3392 ; AVX1-NEXT:    retq
   3393 ;
   3394 ; AVX2-LABEL: insert_dup_elt1_mem_v16i16_i32:
   3395 ; AVX2:       # BB#0:
   3396 ; AVX2-NEXT:    vpbroadcastw 2(%rdi), %ymm0
   3397 ; AVX2-NEXT:    retq
   3398   %tmp = load i32, i32* %ptr, align 4
   3399   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
   3400   %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
   3401   %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   3402   ret <16 x i16> %tmp3
   3403 }
   3404 
   3405 define <16 x i16> @insert_dup_elt3_mem_v16i16_i32(i32* %ptr) #0 {
   3406 ; AVX1-LABEL: insert_dup_elt3_mem_v16i16_i32:
   3407 ; AVX1:       # BB#0:
   3408 ; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   3409 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
   3410 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   3411 ; AVX1-NEXT:    retq
   3412 ;
   3413 ; AVX2-LABEL: insert_dup_elt3_mem_v16i16_i32:
   3414 ; AVX2:       # BB#0:
   3415 ; AVX2-NEXT:    vpbroadcastw 2(%rdi), %ymm0
   3416 ; AVX2-NEXT:    retq
   3417   %tmp = load i32, i32* %ptr, align 4
   3418   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
   3419   %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
   3420   %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
   3421   ret <16 x i16> %tmp3
   3422 }
   3423