Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-SLOW
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-FAST
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-SLOW
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-FAST
      7 
      8 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
      9 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
     10 ; AVX1:       # %bb.0:
     11 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
     12 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
     13 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
     14 ; AVX1-NEXT:    retq
     15 ;
     16 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
     17 ; AVX2OR512VL:       # %bb.0:
     18 ; AVX2OR512VL-NEXT:    vpbroadcastw %xmm0, %ymm0
     19 ; AVX2OR512VL-NEXT:    retq
     20   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
     21   ret <16 x i16> %shuffle
     22 }
     23 
     24 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00(<16 x i16> %a, <16 x i16> %b) {
     25 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
     26 ; AVX1:       # %bb.0:
     27 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,3,4,5,6,7]
     28 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
     29 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
     30 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
     31 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     32 ; AVX1-NEXT:    retq
     33 ;
     34 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
     35 ; AVX2-SLOW:       # %bb.0:
     36 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
     37 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
     38 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
     39 ; AVX2-SLOW-NEXT:    retq
     40 ;
     41 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
     42 ; AVX2-FAST:       # %bb.0:
     43 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
     44 ; AVX2-FAST-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
     45 ; AVX2-FAST-NEXT:    retq
     46 ;
     47 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
     48 ; AVX512VL-SLOW:       # %bb.0:
     49 ; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
     50 ; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
     51 ; AVX512VL-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
     52 ; AVX512VL-SLOW-NEXT:    retq
     53 ;
     54 ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
     55 ; AVX512VL-FAST:       # %bb.0:
     56 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
     57 ; AVX512VL-FAST-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
     58 ; AVX512VL-FAST-NEXT:    retq
     59   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
     60   ret <16 x i16> %shuffle
     61 }
     62 
     63 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00(<16 x i16> %a, <16 x i16> %b) {
     64 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
     65 ; AVX1:       # %bb.0:
     66 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,3,4,5,6,7]
     67 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
     68 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
     69 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
     70 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     71 ; AVX1-NEXT:    retq
     72 ;
     73 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
     74 ; AVX2-SLOW:       # %bb.0:
     75 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
     76 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
     77 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
     78 ; AVX2-SLOW-NEXT:    retq
     79 ;
     80 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
     81 ; AVX2-FAST:       # %bb.0:
     82 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
     83 ; AVX2-FAST-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
     84 ; AVX2-FAST-NEXT:    retq
     85 ;
     86 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
     87 ; AVX512VL-SLOW:       # %bb.0:
     88 ; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
     89 ; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
     90 ; AVX512VL-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
     91 ; AVX512VL-SLOW-NEXT:    retq
     92 ;
     93 ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
     94 ; AVX512VL-FAST:       # %bb.0:
     95 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
     96 ; AVX512VL-FAST-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
     97 ; AVX512VL-FAST-NEXT:    retq
     98   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
     99   ret <16 x i16> %shuffle
    100 }
    101 
    102 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    103 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
    104 ; AVX1:       # %bb.0:
    105 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,3,4,5,6,7]
    106 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
    107 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
    108 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
    109 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    110 ; AVX1-NEXT:    retq
    111 ;
    112 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
    113 ; AVX2-SLOW:       # %bb.0:
    114 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
    115 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
    116 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
    117 ; AVX2-SLOW-NEXT:    retq
    118 ;
    119 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
    120 ; AVX2-FAST:       # %bb.0:
    121 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
    122 ; AVX2-FAST-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
    123 ; AVX2-FAST-NEXT:    retq
    124 ;
    125 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
    126 ; AVX512VL-SLOW:       # %bb.0:
    127 ; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
    128 ; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
    129 ; AVX512VL-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
    130 ; AVX512VL-SLOW-NEXT:    retq
    131 ;
    132 ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
    133 ; AVX512VL-FAST:       # %bb.0:
    134 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
    135 ; AVX512VL-FAST-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
    136 ; AVX512VL-FAST-NEXT:    retq
    137   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
    138   ret <16 x i16> %shuffle
    139 }
    140 
    141 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    142 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
    143 ; AVX1:       # %bb.0:
    144 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,3,4,5,6,7]
    145 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
    146 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
    147 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    148 ; AVX1-NEXT:    retq
    149 ;
    150 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
    151 ; AVX2OR512VL:       # %bb.0:
    152 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9]
    153 ; AVX2OR512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
    154 ; AVX2OR512VL-NEXT:    retq
    155   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
    156   ret <16 x i16> %shuffle
    157 }
    158 
    159 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    160 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
    161 ; AVX1:       # %bb.0:
    162 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,3,4,5,6,7]
    163 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
    164 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
    165 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    166 ; AVX1-NEXT:    retq
    167 ;
    168 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
    169 ; AVX2OR512VL:       # %bb.0:
    170 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,10,11,0,1]
    171 ; AVX2OR512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
    172 ; AVX2OR512VL-NEXT:    retq
    173   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
    174   ret <16 x i16> %shuffle
    175 }
    176 
    177 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    178 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
    179 ; AVX1:       # %bb.0:
    180 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,3,4,5,6,7]
    181 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
    182 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
    183 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    184 ; AVX1-NEXT:    retq
    185 ;
    186 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
    187 ; AVX2OR512VL:       # %bb.0:
    188 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,12,13,0,1,0,1]
    189 ; AVX2OR512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
    190 ; AVX2OR512VL-NEXT:    retq
    191   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    192   ret <16 x i16> %shuffle
    193 }
    194 
    195 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    196 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
    197 ; AVX1:       # %bb.0:
    198 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,3,4,5,6,7]
    199 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
    200 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    201 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    202 ; AVX1-NEXT:    retq
    203 ;
    204 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
    205 ; AVX2OR512VL:       # %bb.0:
    206 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1]
    207 ; AVX2OR512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
    208 ; AVX2OR512VL-NEXT:    retq
    209   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    210   ret <16 x i16> %shuffle
    211 }
    212 
    213 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    214 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
    215 ; AVX1:       # %bb.0:
    216 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    217 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    218 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,1,4,5,6,7]
    219 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,1]
    220 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
    221 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    222 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    223 ; AVX1-NEXT:    retq
    224 ;
    225 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
    226 ; AVX2-SLOW:       # %bb.0:
    227 ; AVX2-SLOW-NEXT:    vpbroadcastw %xmm0, %xmm1
    228 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
    229 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,2,3,4,5,6,7,8,8,10,11,12,13,14,15]
    230 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
    231 ; AVX2-SLOW-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
    232 ; AVX2-SLOW-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
    233 ; AVX2-SLOW-NEXT:    retq
    234 ;
    235 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
    236 ; AVX2-FAST:       # %bb.0:
    237 ; AVX2-FAST-NEXT:    vpbroadcastw %xmm0, %xmm1
    238 ; AVX2-FAST-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
    239 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    240 ; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
    241 ; AVX2-FAST-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
    242 ; AVX2-FAST-NEXT:    retq
    243 ;
    244 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
    245 ; AVX512VL:       # %bb.0:
    246 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
    247 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
    248 ; AVX512VL-NEXT:    retq
    249   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    250   ret <16 x i16> %shuffle
    251 }
    252 
    253 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    254 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
    255 ; AVX1:       # %bb.0:
    256 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    257 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    258 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,3,0,4,5,6,7]
    259 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,1]
    260 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
    261 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    262 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    263 ; AVX1-NEXT:    retq
    264 ;
    265 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
    266 ; AVX2:       # %bb.0:
    267 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
    268 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,0,255,255,u,u,u,u,u,u,u,u,u,u,u,u,255,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
    269 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
    270 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    271 ; AVX2-NEXT:    retq
    272 ;
    273 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
    274 ; AVX512VL:       # %bb.0:
    275 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0]
    276 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
    277 ; AVX512VL-NEXT:    retq
    278   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    279   ret <16 x i16> %shuffle
    280 }
    281 
    282 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    283 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
    284 ; AVX1:       # %bb.0:
    285 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    286 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    287 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1]
    288 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
    289 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    290 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    291 ; AVX1-NEXT:    retq
    292 ;
    293 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
    294 ; AVX2-SLOW:       # %bb.0:
    295 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
    296 ; AVX2-SLOW-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
    297 ; AVX2-SLOW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    298 ; AVX2-SLOW-NEXT:    retq
    299 ;
    300 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
    301 ; AVX2-FAST:       # %bb.0:
    302 ; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,5,6,7,0,1,2,3]
    303 ; AVX2-FAST-NEXT:    vpermd %ymm0, %ymm1, %ymm0
    304 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    305 ; AVX2-FAST-NEXT:    retq
    306 ;
    307 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
    308 ; AVX512VL:       # %bb.0:
    309 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0]
    310 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
    311 ; AVX512VL-NEXT:    retq
    312   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    313   ret <16 x i16> %shuffle
    314 }
    315 
    316 define <16 x i16> @shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    317 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
    318 ; AVX1:       # %bb.0:
    319 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    320 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    321 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1]
    322 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
    323 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    324 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    325 ; AVX1-NEXT:    retq
    326 ;
    327 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
    328 ; AVX2-SLOW:       # %bb.0:
    329 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
    330 ; AVX2-SLOW-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
    331 ; AVX2-SLOW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    332 ; AVX2-SLOW-NEXT:    retq
    333 ;
    334 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
    335 ; AVX2-FAST:       # %bb.0:
    336 ; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,5,6,7,0,1,2,3]
    337 ; AVX2-FAST-NEXT:    vpermd %ymm0, %ymm1, %ymm0
    338 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    339 ; AVX2-FAST-NEXT:    retq
    340 ;
    341 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
    342 ; AVX512VL:       # %bb.0:
    343 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0]
    344 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
    345 ; AVX512VL-NEXT:    retq
    346   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    347   ret <16 x i16> %shuffle
    348 }
    349 
    350 define <16 x i16> @shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    351 ; AVX1-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
    352 ; AVX1:       # %bb.0:
    353 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    354 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    355 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
    356 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
    357 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    358 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    359 ; AVX1-NEXT:    retq
    360 ;
    361 ; AVX2-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
    362 ; AVX2:       # %bb.0:
    363 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
    364 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    365 ; AVX2-NEXT:    retq
    366 ;
    367 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
    368 ; AVX512VL:       # %bb.0:
    369 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0]
    370 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
    371 ; AVX512VL-NEXT:    retq
    372   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 12, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    373   ret <16 x i16> %shuffle
    374 }
    375 
    376 define <16 x i16> @shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    377 ; AVX1-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
    378 ; AVX1:       # %bb.0:
    379 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    380 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    381 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
    382 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
    383 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    384 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    385 ; AVX1-NEXT:    retq
    386 ;
    387 ; AVX2-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
    388 ; AVX2:       # %bb.0:
    389 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
    390 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    391 ; AVX2-NEXT:    retq
    392 ;
    393 ; AVX512VL-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
    394 ; AVX512VL:       # %bb.0:
    395 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0]
    396 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
    397 ; AVX512VL-NEXT:    retq
    398   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    399   ret <16 x i16> %shuffle
    400 }
    401 
    402 define <16 x i16> @shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    403 ; AVX1-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
    404 ; AVX1:       # %bb.0:
    405 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    406 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    407 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
    408 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
    409 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    410 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    411 ; AVX1-NEXT:    retq
    412 ;
    413 ; AVX2-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
    414 ; AVX2:       # %bb.0:
    415 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
    416 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    417 ; AVX2-NEXT:    retq
    418 ;
    419 ; AVX512VL-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
    420 ; AVX512VL:       # %bb.0:
    421 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
    422 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
    423 ; AVX512VL-NEXT:    retq
    424   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    425   ret <16 x i16> %shuffle
    426 }
    427 
    428 define <16 x i16> @shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    429 ; AVX1-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
    430 ; AVX1:       # %bb.0:
    431 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    432 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    433 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    434 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
    435 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    436 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    437 ; AVX1-NEXT:    retq
    438 ;
    439 ; AVX2-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
    440 ; AVX2:       # %bb.0:
    441 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
    442 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    443 ; AVX2-NEXT:    retq
    444 ;
    445 ; AVX512VL-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
    446 ; AVX512VL:       # %bb.0:
    447 ; AVX512VL-NEXT:    movl $15, %eax
    448 ; AVX512VL-NEXT:    vmovd %eax, %xmm1
    449 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
    450 ; AVX512VL-NEXT:    retq
    451   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    452   ret <16 x i16> %shuffle
    453 }
    454 
    455 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
    456 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
    457 ; AVX1:       # %bb.0:
    458 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,3,4,5,6,7]
    459 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
    460 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    461 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
    462 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    463 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    464 ; AVX1-NEXT:    retq
    465 ;
    466 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
    467 ; AVX2-SLOW:       # %bb.0:
    468 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,2,3,4,5,6,7,8,8,10,11,12,13,14,15]
    469 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
    470 ; AVX2-SLOW-NEXT:    retq
    471 ;
    472 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
    473 ; AVX2-FAST:       # %bb.0:
    474 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    475 ; AVX2-FAST-NEXT:    retq
    476 ;
    477 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
    478 ; AVX512VL-SLOW:       # %bb.0:
    479 ; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,2,3,4,5,6,7,8,8,10,11,12,13,14,15]
    480 ; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
    481 ; AVX512VL-SLOW-NEXT:    retq
    482 ;
    483 ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
    484 ; AVX512VL-FAST:       # %bb.0:
    485 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
    486 ; AVX512VL-FAST-NEXT:    retq
    487   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
    488   ret <16 x i16> %shuffle
    489 }
    490 
    491 define <16 x i16> @shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15(<16 x i16> %a, <16 x i16> %b) {
    492 ; AVX1-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
    493 ; AVX1:       # %bb.0:
    494 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,7,6,7]
    495 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,2,2]
    496 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    497 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,6,7]
    498 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
    499 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    500 ; AVX1-NEXT:    retq
    501 ;
    502 ; AVX2-SLOW-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
    503 ; AVX2-SLOW:       # %bb.0:
    504 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15]
    505 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
    506 ; AVX2-SLOW-NEXT:    retq
    507 ;
    508 ; AVX2-FAST-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
    509 ; AVX2-FAST:       # %bb.0:
    510 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15,30,31,30,31,30,31,30,31,30,31,30,31,30,31,30,31]
    511 ; AVX2-FAST-NEXT:    retq
    512 ;
    513 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
    514 ; AVX512VL-SLOW:       # %bb.0:
    515 ; AVX512VL-SLOW-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15]
    516 ; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
    517 ; AVX512VL-SLOW-NEXT:    retq
    518 ;
    519 ; AVX512VL-FAST-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
    520 ; AVX512VL-FAST:       # %bb.0:
    521 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15,30,31,30,31,30,31,30,31,30,31,30,31,30,31,30,31]
    522 ; AVX512VL-FAST-NEXT:    retq
    523   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
    524   ret <16 x i16> %shuffle
    525 }
    526 
    527 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
    528 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
    529 ; AVX1:       # %bb.0:
    530 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
    531 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
    532 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    533 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
    534 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
    535 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    536 ; AVX1-NEXT:    retq
    537 ;
    538 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
    539 ; AVX2-SLOW:       # %bb.0:
    540 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
    541 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
    542 ; AVX2-SLOW-NEXT:    retq
    543 ;
    544 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
    545 ; AVX2-FAST:       # %bb.0:
    546 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
    547 ; AVX2-FAST-NEXT:    retq
    548 ;
    549 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
    550 ; AVX512VL-SLOW:       # %bb.0:
    551 ; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
    552 ; AVX512VL-SLOW-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
    553 ; AVX512VL-SLOW-NEXT:    retq
    554 ;
    555 ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
    556 ; AVX512VL-FAST:       # %bb.0:
    557 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
    558 ; AVX512VL-FAST-NEXT:    retq
    559   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
    560   ret <16 x i16> %shuffle
    561 }
    562 
    563 define <16 x i16> @shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15(<16 x i16> %a, <16 x i16> %b) {
    564 ; AVX1-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
    565 ; AVX1:       # %bb.0:
    566 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
    567 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,7,7,7]
    568 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    569 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
    570 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
    571 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    572 ; AVX1-NEXT:    retq
    573 ;
    574 ; AVX2-SLOW-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
    575 ; AVX2-SLOW:       # %bb.0:
    576 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15]
    577 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
    578 ; AVX2-SLOW-NEXT:    retq
    579 ;
    580 ; AVX2-FAST-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
    581 ; AVX2-FAST:       # %bb.0:
    582 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[6,7,6,7,6,7,6,7,14,15,14,15,14,15,14,15,22,23,22,23,22,23,22,23,30,31,30,31,30,31,30,31]
    583 ; AVX2-FAST-NEXT:    retq
    584 ;
    585 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
    586 ; AVX512VL-SLOW:       # %bb.0:
    587 ; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15]
    588 ; AVX512VL-SLOW-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
    589 ; AVX512VL-SLOW-NEXT:    retq
    590 ;
    591 ; AVX512VL-FAST-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
    592 ; AVX512VL-FAST:       # %bb.0:
    593 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[6,7,6,7,6,7,6,7,14,15,14,15,14,15,14,15,22,23,22,23,22,23,22,23,30,31,30,31,30,31,30,31]
    594 ; AVX512VL-FAST-NEXT:    retq
    595   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7, i32 11, i32 11, i32 11, i32 11, i32 15, i32 15, i32 15, i32 15>
    596   ret <16 x i16> %shuffle
    597 }
    598 
    599 define <16 x i16> @shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14(<16 x i16> %a, <16 x i16> %b) {
    600 ; AVX1-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
    601 ; AVX1:       # %bb.0:
    602 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7]
    603 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
    604 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    605 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
    606 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6]
    607 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    608 ; AVX1-NEXT:    retq
    609 ;
    610 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
    611 ; AVX2-SLOW:       # %bb.0:
    612 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,2,2,4,5,6,7,8,8,10,10,12,13,14,15]
    613 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,6,8,9,10,11,12,12,14,14]
    614 ; AVX2-SLOW-NEXT:    retq
    615 ;
    616 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
    617 ; AVX2-FAST:       # %bb.0:
    618 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13,16,17,16,17,20,21,20,21,24,25,24,25,28,29,28,29]
    619 ; AVX2-FAST-NEXT:    retq
    620 ;
    621 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
    622 ; AVX512VL-SLOW:       # %bb.0:
    623 ; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,2,2,4,5,6,7,8,8,10,10,12,13,14,15]
    624 ; AVX512VL-SLOW-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,6,8,9,10,11,12,12,14,14]
    625 ; AVX512VL-SLOW-NEXT:    retq
    626 ;
    627 ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
    628 ; AVX512VL-FAST:       # %bb.0:
    629 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13,16,17,16,17,20,21,20,21,24,25,24,25,28,29,28,29]
    630 ; AVX512VL-FAST-NEXT:    retq
    631   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
    632   ret <16 x i16> %shuffle
    633 }
    634 
    635 define <16 x i16> @shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15(<16 x i16> %a, <16 x i16> %b) {
    636 ; AVX1-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
    637 ; AVX1:       # %bb.0:
    638 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[1,1,3,3,4,5,6,7]
    639 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,7,7]
    640 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    641 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,1,3,3,4,5,6,7]
    642 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7]
    643 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    644 ; AVX1-NEXT:    retq
    645 ;
    646 ; AVX2-SLOW-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
    647 ; AVX2-SLOW:       # %bb.0:
    648 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15]
    649 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,7,7,8,9,10,11,13,13,15,15]
    650 ; AVX2-SLOW-NEXT:    retq
    651 ;
    652 ; AVX2-FAST-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
    653 ; AVX2-FAST:       # %bb.0:
    654 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15,18,19,18,19,22,23,22,23,26,27,26,27,30,31,30,31]
    655 ; AVX2-FAST-NEXT:    retq
    656 ;
    657 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
    658 ; AVX512VL-SLOW:       # %bb.0:
    659 ; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15]
    660 ; AVX512VL-SLOW-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,7,7,8,9,10,11,13,13,15,15]
    661 ; AVX512VL-SLOW-NEXT:    retq
    662 ;
    663 ; AVX512VL-FAST-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
    664 ; AVX512VL-FAST:       # %bb.0:
    665 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15,18,19,18,19,22,23,22,23,26,27,26,27,30,31,30,31]
    666 ; AVX512VL-FAST-NEXT:    retq
    667   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
    668   ret <16 x i16> %shuffle
    669 }
    670 
    671 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00(<16 x i16> %a, <16 x i16> %b) {
    672 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
    673 ; AVX1:       # %bb.0:
    674 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
    675 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
    676 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    677 ; AVX1-NEXT:    retq
    678 ;
    679 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
    680 ; AVX2-SLOW:       # %bb.0:
    681 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
    682 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
    683 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
    684 ; AVX2-SLOW-NEXT:    retq
    685 ;
    686 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
    687 ; AVX2-FAST:       # %bb.0:
    688 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
    689 ; AVX2-FAST-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
    690 ; AVX2-FAST-NEXT:    retq
    691 ;
    692 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
    693 ; AVX512VL-SLOW:       # %bb.0:
    694 ; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
    695 ; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
    696 ; AVX512VL-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
    697 ; AVX512VL-SLOW-NEXT:    retq
    698 ;
    699 ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
    700 ; AVX512VL-FAST:       # %bb.0:
    701 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
    702 ; AVX512VL-FAST-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
    703 ; AVX512VL-FAST-NEXT:    retq
    704   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
    705   ret <16 x i16> %shuffle
    706 }
    707 
    708 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00(<16 x i16> %a, <16 x i16> %b) {
    709 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
    710 ; AVX1:       # %bb.0:
    711 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
    712 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
    713 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    714 ; AVX1-NEXT:    retq
    715 ;
    716 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
    717 ; AVX2-SLOW:       # %bb.0:
    718 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
    719 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
    720 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
    721 ; AVX2-SLOW-NEXT:    retq
    722 ;
    723 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
    724 ; AVX2-FAST:       # %bb.0:
    725 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
    726 ; AVX2-FAST-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
    727 ; AVX2-FAST-NEXT:    retq
    728 ;
    729 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
    730 ; AVX512VL-SLOW:       # %bb.0:
    731 ; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
    732 ; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
    733 ; AVX512VL-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
    734 ; AVX512VL-SLOW-NEXT:    retq
    735 ;
    736 ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
    737 ; AVX512VL-FAST:       # %bb.0:
    738 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
    739 ; AVX512VL-FAST-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
    740 ; AVX512VL-FAST-NEXT:    retq
    741   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
    742   ret <16 x i16> %shuffle
    743 }
    744 
    745 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    746 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
    747 ; AVX1:       # %bb.0:
    748 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
    749 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
    750 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    751 ; AVX1-NEXT:    retq
    752 ;
    753 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
    754 ; AVX2-SLOW:       # %bb.0:
    755 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
    756 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
    757 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
    758 ; AVX2-SLOW-NEXT:    retq
    759 ;
    760 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
    761 ; AVX2-FAST:       # %bb.0:
    762 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
    763 ; AVX2-FAST-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
    764 ; AVX2-FAST-NEXT:    retq
    765 ;
    766 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
    767 ; AVX512VL-SLOW:       # %bb.0:
    768 ; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
    769 ; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
    770 ; AVX512VL-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
    771 ; AVX512VL-SLOW-NEXT:    retq
    772 ;
    773 ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
    774 ; AVX512VL-FAST:       # %bb.0:
    775 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
    776 ; AVX512VL-FAST-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
    777 ; AVX512VL-FAST-NEXT:    retq
    778   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
    779   ret <16 x i16> %shuffle
    780 }
    781 
    782 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    783 ; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00:
    784 ; AVX1:       # %bb.0:
    785 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
    786 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    787 ; AVX1-NEXT:    retq
    788 ;
    789 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00:
    790 ; AVX2OR512VL:       # %bb.0:
    791 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
    792 ; AVX2OR512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
    793 ; AVX2OR512VL-NEXT:    retq
    794   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
    795   ret <16 x i16> %shuffle
    796 }
    797 
    798 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    799 ; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00:
    800 ; AVX1:       # %bb.0:
    801 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
    802 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    803 ; AVX1-NEXT:    retq
    804 ;
    805 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00:
    806 ; AVX2OR512VL:       # %bb.0:
    807 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
    808 ; AVX2OR512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
    809 ; AVX2OR512VL-NEXT:    retq
    810   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
    811   ret <16 x i16> %shuffle
    812 }
    813 
    814 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    815 ; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
    816 ; AVX1:       # %bb.0:
    817 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
    818 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    819 ; AVX1-NEXT:    retq
    820 ;
    821 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
    822 ; AVX2OR512VL:       # %bb.0:
    823 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
    824 ; AVX2OR512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
    825 ; AVX2OR512VL-NEXT:    retq
    826   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    827   ret <16 x i16> %shuffle
    828 }
    829 
    830 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
    831 ; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
    832 ; AVX1:       # %bb.0:
    833 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    834 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    835 ; AVX1-NEXT:    retq
    836 ;
    837 ; AVX2OR512VL-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
    838 ; AVX2OR512VL:       # %bb.0:
    839 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    840 ; AVX2OR512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
    841 ; AVX2OR512VL-NEXT:    retq
    842   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    843   ret <16 x i16> %shuffle
    844 }
    845 
    846 define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31(<16 x i16> %a, <16 x i16> %b) {
    847 ; AVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
    848 ; AVX1:       # %bb.0:
    849 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0]
    850 ; AVX1-NEXT:    vandnps %ymm1, %ymm2, %ymm1
    851 ; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
    852 ; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
    853 ; AVX1-NEXT:    retq
    854 ;
    855 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
    856 ; AVX2OR512VL:       # %bb.0:
    857 ; AVX2OR512VL-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
    858 ; AVX2OR512VL-NEXT:    retq
    859   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
    860   ret <16 x i16> %shuffle
    861 }
    862 
    863 define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15(<16 x i16> %a, <16 x i16> %b) {
    864 ; AVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15:
    865 ; AVX1:       # %bb.0:
    866 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0]
    867 ; AVX1-NEXT:    vandnps %ymm0, %ymm2, %ymm0
    868 ; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
    869 ; AVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
    870 ; AVX1-NEXT:    retq
    871 ;
    872 ; AVX2OR512VL-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15:
    873 ; AVX2OR512VL:       # %bb.0:
    874 ; AVX2OR512VL-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
    875 ; AVX2OR512VL-NEXT:    retq
    876   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
    877   ret <16 x i16> %shuffle
    878 }
    879 
    880 define <16 x i16> @shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31(<16 x i16> %a, <16 x i16> %b) {
    881 ; ALL-LABEL: shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31:
    882 ; ALL:       # %bb.0:
    883 ; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
    884 ; ALL-NEXT:    retq
    885   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
    886   ret <16 x i16> %shuffle
    887 }
    888 
    889 define <16 x i16> @shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
    890 ; ALL-LABEL: shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15:
    891 ; ALL:       # %bb.0:
    892 ; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
    893 ; ALL-NEXT:    retq
    894   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 12, i32 13, i32 14, i32 15>
    895   ret <16 x i16> %shuffle
    896 }
    897 
    898 define <16 x i16> @shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31(<16 x i16> %a, <16 x i16> %b) {
    899 ; AVX1-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31:
    900 ; AVX1:       # %bb.0:
    901 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,0]
    902 ; AVX1-NEXT:    vandnps %ymm1, %ymm2, %ymm1
    903 ; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
    904 ; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
    905 ; AVX1-NEXT:    retq
    906 ;
    907 ; AVX2-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31:
    908 ; AVX2:       # %bb.0:
    909 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0]
    910 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
    911 ; AVX2-NEXT:    retq
    912 ;
    913 ; AVX512VL-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31:
    914 ; AVX512VL:       # %bb.0:
    915 ; AVX512VL-NEXT:    movw $-32768, %ax # imm = 0x8000
    916 ; AVX512VL-NEXT:    kmovd %eax, %k1
    917 ; AVX512VL-NEXT:    vmovdqu16 %ymm1, %ymm0 {%k1}
    918 ; AVX512VL-NEXT:    retq
    919   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31>
    920   ret <16 x i16> %shuffle
    921 }
    922 
    923 define <16 x i16> @shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
    924 ; AVX1-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15:
    925 ; AVX1:       # %bb.0:
    926 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [0,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535]
    927 ; AVX1-NEXT:    vandnps %ymm1, %ymm2, %ymm1
    928 ; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
    929 ; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
    930 ; AVX1-NEXT:    retq
    931 ;
    932 ; AVX2-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15:
    933 ; AVX2:       # %bb.0:
    934 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
    935 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
    936 ; AVX2-NEXT:    retq
    937 ;
    938 ; AVX512VL-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15:
    939 ; AVX512VL:       # %bb.0:
    940 ; AVX512VL-NEXT:    movw $1, %ax
    941 ; AVX512VL-NEXT:    kmovd %eax, %k1
    942 ; AVX512VL-NEXT:    vmovdqu16 %ymm1, %ymm0 {%k1}
    943 ; AVX512VL-NEXT:    retq
    944   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    945   ret <16 x i16> %shuffle
    946 }
    947 
    948 define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15(<16 x i16> %a, <16 x i16> %b) {
    949 ; AVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15:
    950 ; AVX1:       # %bb.0:
    951 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [65535,0,65535,0,65535,0,65535,0,0,65535,0,65535,0,65535,0,65535]
    952 ; AVX1-NEXT:    vandnps %ymm1, %ymm2, %ymm1
    953 ; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
    954 ; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
    955 ; AVX1-NEXT:    retq
    956 ;
    957 ; AVX2-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15:
    958 ; AVX2:       # %bb.0:
    959 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255]
    960 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
    961 ; AVX2-NEXT:    retq
    962 ;
    963 ; AVX512VL-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15:
    964 ; AVX512VL:       # %bb.0:
    965 ; AVX512VL-NEXT:    movw $21930, %ax # imm = 0x55AA
    966 ; AVX512VL-NEXT:    kmovd %eax, %k1
    967 ; AVX512VL-NEXT:    vmovdqu16 %ymm1, %ymm0 {%k1}
    968 ; AVX512VL-NEXT:    retq
    969   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
    970   ret <16 x i16> %shuffle
    971 }
    972 
    973 define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31(<16 x i16> %a, <16 x i16> %b) {
    974 ; AVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31:
    975 ; AVX1:       # %bb.0:
    976 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [0,65535,0,65535,0,65535,0,65535,65535,0,65535,0,65535,0,65535,0]
    977 ; AVX1-NEXT:    vandnps %ymm1, %ymm2, %ymm1
    978 ; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
    979 ; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
    980 ; AVX1-NEXT:    retq
    981 ;
    982 ; AVX2-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31:
    983 ; AVX2:       # %bb.0:
    984 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0]
    985 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
    986 ; AVX2-NEXT:    retq
    987 ;
    988 ; AVX512VL-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31:
    989 ; AVX512VL:       # %bb.0:
    990 ; AVX512VL-NEXT:    movw $-21931, %ax # imm = 0xAA55
    991 ; AVX512VL-NEXT:    kmovd %eax, %k1
    992 ; AVX512VL-NEXT:    vmovdqu16 %ymm1, %ymm0 {%k1}
    993 ; AVX512VL-NEXT:    retq
    994   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
    995   ret <16 x i16> %shuffle
    996 }
    997 
    998 define <16 x i16> @shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31(<16 x i16> %a, <16 x i16> %b) {
    999 ; ALL-LABEL: shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31:
   1000 ; ALL:       # %bb.0:
   1001 ; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4],ymm1[5],ymm0[6],ymm1[7]
   1002 ; ALL-NEXT:    retq
   1003   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 6, i32 7, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
   1004   ret <16 x i16> %shuffle
   1005 }
   1006 
   1007 define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16(<16 x i16> %a, <16 x i16> %b) {
   1008 ; AVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16:
   1009 ; AVX1:       # %bb.0:
   1010 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1011 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
   1012 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   1013 ; AVX1-NEXT:    retq
   1014 ;
   1015 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16:
   1016 ; AVX2OR512VL:       # %bb.0:
   1017 ; AVX2OR512VL-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1018 ; AVX2OR512VL-NEXT:    vpbroadcastd %xmm0, %ymm0
   1019 ; AVX2OR512VL-NEXT:    retq
   1020   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16>
   1021   ret <16 x i16> %shuffle
   1022 }
   1023 
   1024 define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24(<16 x i16> %a, <16 x i16> %b) {
   1025 ; AVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
   1026 ; AVX1:       # %bb.0:
   1027 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1028 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
   1029 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
   1030 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1031 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1032 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
   1033 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
   1034 ; AVX1-NEXT:    retq
   1035 ;
   1036 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
   1037 ; AVX2-SLOW:       # %bb.0:
   1038 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
   1039 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm1 = ymm1[0,0,2,3,4,5,6,7,8,8,10,11,12,13,14,15]
   1040 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
   1041 ; AVX2-SLOW-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
   1042 ; AVX2-SLOW-NEXT:    retq
   1043 ;
   1044 ; AVX2-FAST-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
   1045 ; AVX2-FAST:       # %bb.0:
   1046 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
   1047 ; AVX2-FAST-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
   1048 ; AVX2-FAST-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
   1049 ; AVX2-FAST-NEXT:    retq
   1050 ;
   1051 ; AVX512VL-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
   1052 ; AVX512VL:       # %bb.0:
   1053 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,16,0,16,0,16,0,16,8,24,8,24,8,24,8,24]
   1054 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   1055 ; AVX512VL-NEXT:    retq
   1056   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24>
   1057   ret <16 x i16> %shuffle
   1058 }
   1059 
   1060 define <16 x i16> @shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
   1061 ; AVX1-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15:
   1062 ; AVX1:       # %bb.0:
   1063 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   1064 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   1065 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7]
   1066 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
   1067 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
   1068 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
   1069 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1070 ; AVX1-NEXT:    retq
   1071 ;
   1072 ; AVX2-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15:
   1073 ; AVX2:       # %bb.0:
   1074 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm1 = ymm1[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
   1075 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
   1076 ; AVX2-NEXT:    retq
   1077 ;
   1078 ; AVX512VL-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15:
   1079 ; AVX512VL:       # %bb.0:
   1080 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,20,21,22,23,8,8,8,8,28,29,30,31]
   1081 ; AVX512VL-NEXT:    vpermi2w %ymm0, %ymm1, %ymm2
   1082 ; AVX512VL-NEXT:    vmovdqa %ymm2, %ymm0
   1083 ; AVX512VL-NEXT:    retq
   1084   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 4, i32 5, i32 6, i32 7, i32 24, i32 24, i32 24, i32 24, i32 12, i32 13, i32 14, i32 15>
   1085   ret <16 x i16> %shuffle
   1086 }
   1087 
   1088 define <16 x i16> @shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12(<16 x i16> %a, <16 x i16> %b) {
   1089 ; AVX1-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
   1090 ; AVX1:       # %bb.0:
   1091 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   1092 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
   1093 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   1094 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7]
   1095 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
   1096 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
   1097 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
   1098 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
   1099 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1100 ; AVX1-NEXT:    retq
   1101 ;
   1102 ; AVX2-SLOW-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
   1103 ; AVX2-SLOW:       # %bb.0:
   1104 ; AVX2-SLOW-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
   1105 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
   1106 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12]
   1107 ; AVX2-SLOW-NEXT:    retq
   1108 ;
   1109 ; AVX2-FAST-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
   1110 ; AVX2-FAST:       # %bb.0:
   1111 ; AVX2-FAST-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
   1112 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,14,15,12,13,10,11,8,9,22,23,20,21,18,19,16,17,30,31,28,29,26,27,24,25]
   1113 ; AVX2-FAST-NEXT:    retq
   1114 ;
   1115 ; AVX512VL-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
   1116 ; AVX512VL:       # %bb.0:
   1117 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [3,2,1,0,23,22,21,20,11,10,9,8,31,30,29,28]
   1118 ; AVX512VL-NEXT:    vpermi2w %ymm0, %ymm1, %ymm2
   1119 ; AVX512VL-NEXT:    vmovdqa %ymm2, %ymm0
   1120 ; AVX512VL-NEXT:    retq
   1121   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 7, i32 6, i32 5, i32 4, i32 27, i32 26, i32 25, i32 24, i32 15, i32 14, i32 13, i32 12>
   1122   ret <16 x i16> %shuffle
   1123 }
   1124 
   1125 define <16 x i16> @shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08(<16 x i16> %a, <16 x i16> %b) {
   1126 ; AVX1-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08:
   1127 ; AVX1:       # %bb.0:
   1128 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   1129 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   1130 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
   1131 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [12,13,8,9,4,5,0,1,14,15,10,11,6,7,2,3]
   1132 ; AVX1-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
   1133 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
   1134 ; AVX1-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
   1135 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1136 ; AVX1-NEXT:    retq
   1137 ;
   1138 ; AVX2-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08:
   1139 ; AVX2:       # %bb.0:
   1140 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
   1141 ; AVX2-NEXT:    vpshuflw {{.*#+}} ymm1 = ymm1[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
   1142 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
   1143 ; AVX2-NEXT:    retq
   1144 ;
   1145 ; AVX512VL-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08:
   1146 ; AVX512VL:       # %bb.0:
   1147 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [3,2,1,0,19,18,17,16,11,10,9,8,27,26,25,24]
   1148 ; AVX512VL-NEXT:    vpermi2w %ymm0, %ymm1, %ymm2
   1149 ; AVX512VL-NEXT:    vmovdqa %ymm2, %ymm0
   1150 ; AVX512VL-NEXT:    retq
   1151   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 3, i32 2, i32 1, i32 0, i32 27, i32 26, i32 25, i32 24, i32 11, i32 10, i32 9, i32 8>
   1152   ret <16 x i16> %shuffle
   1153 }
   1154 
   1155 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08(<16 x i16> %a, <16 x i16> %b) {
   1156 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
   1157 ; AVX1:       # %bb.0:
   1158 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,0,4,5,6,7]
   1159 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,1]
   1160 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1161 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,0,4,5,6,7]
   1162 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
   1163 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1164 ; AVX1-NEXT:    retq
   1165 ;
   1166 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
   1167 ; AVX2-SLOW:       # %bb.0:
   1168 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,1,0,4,5,6,7,8,8,9,8,12,13,14,15]
   1169 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
   1170 ; AVX2-SLOW-NEXT:    retq
   1171 ;
   1172 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
   1173 ; AVX2-FAST:       # %bb.0:
   1174 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,18,19,16,17]
   1175 ; AVX2-FAST-NEXT:    retq
   1176 ;
   1177 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
   1178 ; AVX512VL-SLOW:       # %bb.0:
   1179 ; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,1,0,4,5,6,7,8,8,9,8,12,13,14,15]
   1180 ; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
   1181 ; AVX512VL-SLOW-NEXT:    retq
   1182 ;
   1183 ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
   1184 ; AVX512VL-FAST:       # %bb.0:
   1185 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,18,19,16,17]
   1186 ; AVX512VL-FAST-NEXT:    retq
   1187   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 8>
   1188   ret <16 x i16> %shuffle
   1189 }
   1190 
   1191 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08(<16 x i16> %a, <16 x i16> %b) {
   1192 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
   1193 ; AVX1:       # %bb.0:
   1194 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,2,4,5,6,7]
   1195 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,0]
   1196 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1197 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7]
   1198 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
   1199 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1200 ; AVX1-NEXT:    retq
   1201 ;
   1202 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
   1203 ; AVX2-SLOW:       # %bb.0:
   1204 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,2,4,5,6,7,8,8,8,10,12,13,14,15]
   1205 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,5,4]
   1206 ; AVX2-SLOW-NEXT:    retq
   1207 ;
   1208 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
   1209 ; AVX2-FAST:       # %bb.0:
   1210 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,20,21,16,17,16,17]
   1211 ; AVX2-FAST-NEXT:    retq
   1212 ;
   1213 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
   1214 ; AVX512VL-SLOW:       # %bb.0:
   1215 ; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,2,4,5,6,7,8,8,8,10,12,13,14,15]
   1216 ; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,5,4]
   1217 ; AVX512VL-SLOW-NEXT:    retq
   1218 ;
   1219 ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
   1220 ; AVX512VL-FAST:       # %bb.0:
   1221 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,20,21,16,17,16,17]
   1222 ; AVX512VL-FAST-NEXT:    retq
   1223   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 10, i32 8, i32 8>
   1224   ret <16 x i16> %shuffle
   1225 }
   1226 
   1227 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   1228 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
   1229 ; AVX1:       # %bb.0:
   1230 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,3,0,4,5,6,7]
   1231 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,0]
   1232 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1233 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7]
   1234 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
   1235 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1236 ; AVX1-NEXT:    retq
   1237 ;
   1238 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
   1239 ; AVX2-SLOW:       # %bb.0:
   1240 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,3,0,4,5,6,7,8,8,11,8,12,13,14,15]
   1241 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,5,4]
   1242 ; AVX2-SLOW-NEXT:    retq
   1243 ;
   1244 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
   1245 ; AVX2-FAST:       # %bb.0:
   1246 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,22,23,16,17,16,17,16,17]
   1247 ; AVX2-FAST-NEXT:    retq
   1248 ;
   1249 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
   1250 ; AVX512VL-SLOW:       # %bb.0:
   1251 ; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,3,0,4,5,6,7,8,8,11,8,12,13,14,15]
   1252 ; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,5,4]
   1253 ; AVX512VL-SLOW-NEXT:    retq
   1254 ;
   1255 ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
   1256 ; AVX512VL-FAST:       # %bb.0:
   1257 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,22,23,16,17,16,17,16,17]
   1258 ; AVX512VL-FAST-NEXT:    retq
   1259   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 11, i32 8, i32 8, i32 8>
   1260   ret <16 x i16> %shuffle
   1261 }
   1262 
   1263 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   1264 ; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08:
   1265 ; AVX1:       # %bb.0:
   1266 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1267 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
   1268 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
   1269 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   1270 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1271 ; AVX1-NEXT:    retq
   1272 ;
   1273 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08:
   1274 ; AVX2OR512VL:       # %bb.0:
   1275 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,24,25,16,17,16,17,16,17,16,17]
   1276 ; AVX2OR512VL-NEXT:    retq
   1277   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 12, i32 8, i32 8, i32 8, i32 8>
   1278   ret <16 x i16> %shuffle
   1279 }
   1280 
   1281 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   1282 ; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08:
   1283 ; AVX1:       # %bb.0:
   1284 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1285 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
   1286 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
   1287 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   1288 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1289 ; AVX1-NEXT:    retq
   1290 ;
   1291 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08:
   1292 ; AVX2OR512VL:       # %bb.0:
   1293 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,26,27,16,17,16,17,16,17,16,17,16,17]
   1294 ; AVX2OR512VL-NEXT:    retq
   1295   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 13, i32 8, i32 8, i32 8, i32 8, i32 8>
   1296   ret <16 x i16> %shuffle
   1297 }
   1298 
   1299 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   1300 ; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08:
   1301 ; AVX1:       # %bb.0:
   1302 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1303 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
   1304 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
   1305 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   1306 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1307 ; AVX1-NEXT:    retq
   1308 ;
   1309 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08:
   1310 ; AVX2OR512VL:       # %bb.0:
   1311 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,28,29,16,17,16,17,16,17,16,17,16,17,16,17]
   1312 ; AVX2OR512VL-NEXT:    retq
   1313   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 14, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
   1314   ret <16 x i16> %shuffle
   1315 }
   1316 
   1317 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   1318 ; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08:
   1319 ; AVX1:       # %bb.0:
   1320 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1321 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
   1322 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
   1323 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   1324 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1325 ; AVX1-NEXT:    retq
   1326 ;
   1327 ; AVX2OR512VL-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08:
   1328 ; AVX2OR512VL:       # %bb.0:
   1329 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,30,31,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
   1330 ; AVX2OR512VL-NEXT:    retq
   1331   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
   1332   ret <16 x i16> %shuffle
   1333 }
   1334 
   1335 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) {
   1336 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27:
   1337 ; AVX1:       # %bb.0:
   1338 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   1339 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   1340 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
   1341 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1342 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1343 ; AVX1-NEXT:    retq
   1344 ;
   1345 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27:
   1346 ; AVX2OR512VL:       # %bb.0:
   1347 ; AVX2OR512VL-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
   1348 ; AVX2OR512VL-NEXT:    retq
   1349   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
   1350   ret <16 x i16> %shuffle
   1351 }
   1352 
   1353 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
   1354 ; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31:
   1355 ; AVX1:       # %bb.0:
   1356 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   1357 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   1358 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
   1359 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1360 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1361 ; AVX1-NEXT:    retq
   1362 ;
   1363 ; AVX2OR512VL-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31:
   1364 ; AVX2OR512VL:       # %bb.0:
   1365 ; AVX2OR512VL-NEXT:    vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
   1366 ; AVX2OR512VL-NEXT:    retq
   1367   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
   1368   ret <16 x i16> %shuffle
   1369 }
   1370 
   1371 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
   1372 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31:
   1373 ; AVX1:       # %bb.0:
   1374 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   1375 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   1376 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
   1377 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1378 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1379 ; AVX1-NEXT:    retq
   1380 ;
   1381 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31:
   1382 ; AVX2:       # %bb.0:
   1383 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,6,7,u,u,24,25,u,u,26,27,u,u,28,29,u,u,30,31]
   1384 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,2,3,u,u,4,5,u,u,6,7,u,u,24,25,u,u,26,27,u,u,28,29,u,u,30,31,u,u]
   1385 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
   1386 ; AVX2-NEXT:    retq
   1387 ;
   1388 ; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31:
   1389 ; AVX512VL:       # %bb.0:
   1390 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,16,1,17,2,18,3,19,12,28,13,29,14,30,15,31]
   1391 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   1392 ; AVX512VL-NEXT:    retq
   1393   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
   1394   ret <16 x i16> %shuffle
   1395 }
   1396 
   1397 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) {
   1398 ; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27:
   1399 ; AVX1:       # %bb.0:
   1400 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   1401 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   1402 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
   1403 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1404 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1405 ; AVX1-NEXT:    retq
   1406 ;
   1407 ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27:
   1408 ; AVX2:       # %bb.0:
   1409 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,8,9,u,u,10,11,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
   1410 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,u,u,10,11,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23,u,u]
   1411 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
   1412 ; AVX2-NEXT:    retq
   1413 ;
   1414 ; AVX512VL-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27:
   1415 ; AVX512VL:       # %bb.0:
   1416 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [4,20,5,21,6,22,7,23,8,24,9,25,10,26,11,27]
   1417 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   1418 ; AVX512VL-NEXT:    retq
   1419   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
   1420   ret <16 x i16> %shuffle
   1421 }
   1422 
   1423 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   1424 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08:
   1425 ; AVX1:       # %bb.0:
   1426 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,0,4,5,6,7]
   1427 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,1]
   1428 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1429 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,0,0,4,5,6,7]
   1430 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,1]
   1431 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1432 ; AVX1-NEXT:    retq
   1433 ;
   1434 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08:
   1435 ; AVX2OR512VL:       # %bb.0:
   1436 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,18,19,16,17,16,17,16,17,16,17,16,17,16,17]
   1437 ; AVX2OR512VL-NEXT:    retq
   1438   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 8, i32 9, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
   1439   ret <16 x i16> %shuffle
   1440 }
   1441 
   1442 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   1443 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08:
   1444 ; AVX1:       # %bb.0:
   1445 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,2,4,5,6,7]
   1446 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,0]
   1447 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1448 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,0,4,5,6,7]
   1449 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,0]
   1450 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1451 ; AVX1-NEXT:    retq
   1452 ;
   1453 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08:
   1454 ; AVX2OR512VL:       # %bb.0:
   1455 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,20,21,16,17,16,17,16,17,16,17,16,17]
   1456 ; AVX2OR512VL-NEXT:    retq
   1457   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 8, i32 8, i32 10, i32 8, i32 8, i32 8, i32 8, i32 8>
   1458   ret <16 x i16> %shuffle
   1459 }
   1460 
   1461 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   1462 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08:
   1463 ; AVX1:       # %bb.0:
   1464 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,3,0,4,5,6,7]
   1465 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,0]
   1466 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1467 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,3,4,5,6,7]
   1468 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,0]
   1469 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1470 ; AVX1-NEXT:    retq
   1471 ;
   1472 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08:
   1473 ; AVX2OR512VL:       # %bb.0:
   1474 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,22,23,16,17,16,17,16,17,16,17]
   1475 ; AVX2OR512VL-NEXT:    retq
   1476   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 11, i32 8, i32 8, i32 8, i32 8>
   1477   ret <16 x i16> %shuffle
   1478 }
   1479 
   1480 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   1481 ; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08:
   1482 ; AVX1:       # %bb.0:
   1483 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
   1484 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1485 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,0,1,0,1,0,1]
   1486 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1487 ; AVX1-NEXT:    retq
   1488 ;
   1489 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08:
   1490 ; AVX2OR512VL:       # %bb.0:
   1491 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,16,17,16,17,16,17]
   1492 ; AVX2OR512VL-NEXT:    retq
   1493   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 12, i32 8, i32 8, i32 8>
   1494   ret <16 x i16> %shuffle
   1495 }
   1496 
   1497 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08(<16 x i16> %a, <16 x i16> %b) {
   1498 ; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08:
   1499 ; AVX1:       # %bb.0:
   1500 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
   1501 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1502 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1]
   1503 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1504 ; AVX1-NEXT:    retq
   1505 ;
   1506 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08:
   1507 ; AVX2OR512VL:       # %bb.0:
   1508 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,26,27,16,17,16,17]
   1509 ; AVX2OR512VL-NEXT:    retq
   1510   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 13, i32 8, i32 8>
   1511   ret <16 x i16> %shuffle
   1512 }
   1513 
   1514 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08(<16 x i16> %a, <16 x i16> %b) {
   1515 ; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08:
   1516 ; AVX1:       # %bb.0:
   1517 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
   1518 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1519 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,12,13,0,1]
   1520 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1521 ; AVX1-NEXT:    retq
   1522 ;
   1523 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08:
   1524 ; AVX2OR512VL:       # %bb.0:
   1525 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,28,29,16,17]
   1526 ; AVX2OR512VL-NEXT:    retq
   1527   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 14, i32 8>
   1528   ret <16 x i16> %shuffle
   1529 }
   1530 
   1531 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15(<16 x i16> %a, <16 x i16> %b) {
   1532 ; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15:
   1533 ; AVX1:       # %bb.0:
   1534 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
   1535 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1536 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,14,15]
   1537 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1538 ; AVX1-NEXT:    retq
   1539 ;
   1540 ; AVX2OR512VL-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15:
   1541 ; AVX2OR512VL:       # %bb.0:
   1542 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,30,31]
   1543 ; AVX2OR512VL-NEXT:    retq
   1544   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 15>
   1545   ret <16 x i16> %shuffle
   1546 }
   1547 
   1548 define <16 x i16> @shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08(<16 x i16> %a, <16 x i16> %b) {
   1549 ; AVX1-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08:
   1550 ; AVX1:       # %bb.0:
   1551 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7]
   1552 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
   1553 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1554 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,8,9,8,9,4,5,4,5,0,1,0,1]
   1555 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1556 ; AVX1-NEXT:    retq
   1557 ;
   1558 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08:
   1559 ; AVX2OR512VL:       # %bb.0:
   1560 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13,28,29,28,29,24,25,24,25,20,21,20,21,16,17,16,17]
   1561 ; AVX2OR512VL-NEXT:    retq
   1562   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 14, i32 14, i32 12, i32 12, i32 10, i32 10, i32 8, i32 8>
   1563   ret <16 x i16> %shuffle
   1564 }
   1565 
   1566 define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   1567 ; AVX1-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12:
   1568 ; AVX1:       # %bb.0:
   1569 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
   1570 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1571 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   1572 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
   1573 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1574 ; AVX1-NEXT:    retq
   1575 ;
   1576 ; AVX2OR512VL-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12:
   1577 ; AVX2OR512VL:       # %bb.0:
   1578 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
   1579 ; AVX2OR512VL-NEXT:    retq
   1580   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
   1581   ret <16 x i16> %shuffle
   1582 }
   1583 
   1584 define <16 x i16> @shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08(<16 x i16> %a, <16 x i16> %b) {
   1585 ; AVX1-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08:
   1586 ; AVX1:       # %bb.0:
   1587 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,3,4,5,6,7]
   1588 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
   1589 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1590 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,6,7,0,1,0,1,12,13,0,1]
   1591 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1592 ; AVX1-NEXT:    retq
   1593 ;
   1594 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08:
   1595 ; AVX2OR512VL:       # %bb.0:
   1596 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,28,29,16,17]
   1597 ; AVX2OR512VL-NEXT:    retq
   1598   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 undef, i32 undef, i32 8, i32 8, i32 14, i32 8>
   1599   ret <16 x i16> %shuffle
   1600 }
   1601 
   1602 define <16 x i16> @shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15(<16 x i16> %a, <16 x i16> %b) {
   1603 ; AVX1-LABEL: shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15:
   1604 ; AVX1:       # %bb.0:
   1605 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[14,15,2,3,0,1,0,1,0,1,0,1,0,1,0,1]
   1606 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1607 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,6,7,0,1,0,1,0,1,14,15]
   1608 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1609 ; AVX1-NEXT:    retq
   1610 ;
   1611 ; AVX2OR512VL-LABEL: shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15:
   1612 ; AVX2OR512VL:       # %bb.0:
   1613 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,u,u,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,16,17,30,31]
   1614 ; AVX2OR512VL-NEXT:    retq
   1615   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 undef, i32 undef, i32 8, i32 8, i32 8, i32 15>
   1616   ret <16 x i16> %shuffle
   1617 }
   1618 
   1619 define <16 x i16> @shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08(<16 x i16> %a, <16 x i16> %b) {
   1620 ; AVX1-LABEL: shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08:
   1621 ; AVX1:       # %bb.0:
   1622 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,1,2,2,4,5,6,7]
   1623 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
   1624 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1625 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,12,13,8,9,4,5,4,5,0,1,0,1]
   1626 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1627 ; AVX1-NEXT:    retq
   1628 ;
   1629 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08:
   1630 ; AVX2OR512VL:       # %bb.0:
   1631 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,4,5,8,9,8,9,u,u,12,13,28,29,28,29,u,u,24,25,20,21,20,21,16,17,16,17]
   1632 ; AVX2OR512VL-NEXT:    retq
   1633   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 undef, i32 2, i32 4, i32 4, i32 undef, i32 6, i32 14, i32 14, i32 undef, i32 12, i32 10, i32 10, i32 8, i32 8>
   1634   ret <16 x i16> %shuffle
   1635 }
   1636 
   1637 define <16 x i16> @shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   1638 ; AVX1-LABEL: shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12:
   1639 ; AVX1:       # %bb.0:
   1640 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,4,6,7]
   1641 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,2,3]
   1642 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1643 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,3,4,5,6,7]
   1644 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
   1645 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1646 ; AVX1-NEXT:    retq
   1647 ;
   1648 ; AVX2OR512VL-LABEL: shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12:
   1649 ; AVX2OR512VL:       # %bb.0:
   1650 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,u,u,u,u,u,u,u,u,16,17,16,17,16,17,u,u,u,u,24,25,24,25,24,25]
   1651 ; AVX2OR512VL-NEXT:    retq
   1652   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 12, i32 12, i32 12>
   1653   ret <16 x i16> %shuffle
   1654 }
   1655 
   1656 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20(<16 x i16> %a, <16 x i16> %b) {
   1657 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20:
   1658 ; AVX1:       # %bb.0:
   1659 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   1660 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
   1661 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
   1662 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
   1663 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1664 ; AVX1-NEXT:    retq
   1665 ;
   1666 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20:
   1667 ; AVX2-SLOW:       # %bb.0:
   1668 ; AVX2-SLOW-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1669 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
   1670 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
   1671 ; AVX2-SLOW-NEXT:    retq
   1672 ;
   1673 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20:
   1674 ; AVX2-FAST:       # %bb.0:
   1675 ; AVX2-FAST-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1676 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
   1677 ; AVX2-FAST-NEXT:    retq
   1678 ;
   1679 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20:
   1680 ; AVX512VL:       # %bb.0:
   1681 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,4,4,4,4,16,16,16,16,20,20,20,20]
   1682 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   1683 ; AVX512VL-NEXT:    retq
   1684   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20>
   1685   ret <16 x i16> %shuffle
   1686 }
   1687 
   1688 define <16 x i16> @shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20(<16 x i16> %a, <16 x i16> %b) {
   1689 ; AVX1-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20:
   1690 ; AVX1:       # %bb.0:
   1691 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1692 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   1693 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
   1694 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
   1695 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
   1696 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1697 ; AVX1-NEXT:    retq
   1698 ;
   1699 ; AVX2-SLOW-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20:
   1700 ; AVX2-SLOW:       # %bb.0:
   1701 ; AVX2-SLOW-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
   1702 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
   1703 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
   1704 ; AVX2-SLOW-NEXT:    retq
   1705 ;
   1706 ; AVX2-FAST-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20:
   1707 ; AVX2-FAST:       # %bb.0:
   1708 ; AVX2-FAST-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
   1709 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
   1710 ; AVX2-FAST-NEXT:    retq
   1711 ;
   1712 ; AVX512VL-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20:
   1713 ; AVX512VL:       # %bb.0:
   1714 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [8,8,8,8,12,12,12,12,16,16,16,16,20,20,20,20]
   1715 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   1716 ; AVX512VL-NEXT:    retq
   1717   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20>
   1718   ret <16 x i16> %shuffle
   1719 }
   1720 
   1721 define <16 x i16> @shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28(<16 x i16> %a, <16 x i16> %b) {
   1722 ; AVX1-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28:
   1723 ; AVX1:       # %bb.0:
   1724 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1725 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   1726 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
   1727 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
   1728 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
   1729 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
   1730 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1731 ; AVX1-NEXT:    retq
   1732 ;
   1733 ; AVX2-SLOW-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28:
   1734 ; AVX2-SLOW:       # %bb.0:
   1735 ; AVX2-SLOW-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
   1736 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
   1737 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
   1738 ; AVX2-SLOW-NEXT:    retq
   1739 ;
   1740 ; AVX2-FAST-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28:
   1741 ; AVX2-FAST:       # %bb.0:
   1742 ; AVX2-FAST-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
   1743 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
   1744 ; AVX2-FAST-NEXT:    retq
   1745 ;
   1746 ; AVX512VL-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28:
   1747 ; AVX512VL:       # %bb.0:
   1748 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [8,8,8,8,12,12,12,12,24,24,24,24,28,28,28,28]
   1749 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   1750 ; AVX512VL-NEXT:    retq
   1751   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28>
   1752   ret <16 x i16> %shuffle
   1753 }
   1754 
   1755 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28(<16 x i16> %a, <16 x i16> %b) {
   1756 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28:
   1757 ; AVX1:       # %bb.0:
   1758 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   1759 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
   1760 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
   1761 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
   1762 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
   1763 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1764 ; AVX1-NEXT:    retq
   1765 ;
   1766 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28:
   1767 ; AVX2-SLOW:       # %bb.0:
   1768 ; AVX2-SLOW-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
   1769 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
   1770 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
   1771 ; AVX2-SLOW-NEXT:    retq
   1772 ;
   1773 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28:
   1774 ; AVX2-FAST:       # %bb.0:
   1775 ; AVX2-FAST-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
   1776 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
   1777 ; AVX2-FAST-NEXT:    retq
   1778 ;
   1779 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28:
   1780 ; AVX512VL:       # %bb.0:
   1781 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,4,4,4,4,24,24,24,24,28,28,28,28]
   1782 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   1783 ; AVX512VL-NEXT:    retq
   1784   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28>
   1785   ret <16 x i16> %shuffle
   1786 }
   1787 
   1788 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i16> %a, <16 x i16> %b) {
   1789 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
   1790 ; AVX1:       # %bb.0:
   1791 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1792 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1793 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1794 ; AVX1-NEXT:    retq
   1795 ;
   1796 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
   1797 ; AVX2:       # %bb.0:
   1798 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1799 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1800 ; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
   1801 ; AVX2-NEXT:    retq
   1802 ;
   1803 ; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
   1804 ; AVX512VL:       # %bb.0:
   1805 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23]
   1806 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   1807 ; AVX512VL-NEXT:    retq
   1808   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
   1809   ret <16 x i16> %shuffle
   1810 }
   1811 
   1812 define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) {
   1813 ; AVX1-LABEL: shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24:
   1814 ; AVX1:       # %bb.0:
   1815 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
   1816 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1817 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
   1818 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1819 ; AVX1-NEXT:    retq
   1820 ;
   1821 ; AVX2OR512VL-LABEL: shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24:
   1822 ; AVX2OR512VL:       # %bb.0:
   1823 ; AVX2OR512VL-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
   1824 ; AVX2OR512VL-NEXT:    retq
   1825   %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24>
   1826   ret <16 x i16> %shuffle
   1827 }
   1828 
   1829 define <16 x i16> @shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz(<16 x i16> %a) {
   1830 ; AVX1-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz:
   1831 ; AVX1:       # %bb.0:
   1832 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
   1833 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1834 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
   1835 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1836 ; AVX1-NEXT:    retq
   1837 ;
   1838 ; AVX2OR512VL-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz:
   1839 ; AVX2OR512VL:       # %bb.0:
   1840 ; AVX2OR512VL-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero
   1841 ; AVX2OR512VL-NEXT:    retq
   1842   %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0>
   1843   ret <16 x i16> %shuffle
   1844 }
   1845 
   1846 define <16 x i16> @shuffle_v16i16_06_07_01_02_07_00_04_05_14_15_09_10_15_08_12_13(<16 x i16> %a) {
   1847 ; AVX1-LABEL: shuffle_v16i16_06_07_01_02_07_00_04_05_14_15_09_10_15_08_12_13:
   1848 ; AVX1:       # %bb.0:
   1849 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1850 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [12,13,14,15,2,3,4,5,14,15,0,1,8,9,10,11]
   1851 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
   1852 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   1853 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1854 ; AVX1-NEXT:    retq
   1855 ;
   1856 ; AVX2OR512VL-LABEL: shuffle_v16i16_06_07_01_02_07_00_04_05_14_15_09_10_15_08_12_13:
   1857 ; AVX2OR512VL:       # %bb.0:
   1858 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[12,13,14,15,2,3,4,5,14,15,0,1,8,9,10,11,28,29,30,31,18,19,20,21,30,31,16,17,24,25,26,27]
   1859 ; AVX2OR512VL-NEXT:    retq
   1860   %1 = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 6, i32 7, i32 1, i32 2, i32 7, i32 0, i32 4, i32 5, i32 14, i32 15, i32 9, i32 10, i32 15, i32 8, i32 12, i32 13>
   1861   ret <16 x i16> %1
   1862 }
   1863 
   1864 ;
   1865 ; Shuffle to logical bit shifts
   1866 ;
   1867 
   1868 define <16 x i16> @shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14(<16 x i16> %a) {
   1869 ; AVX1-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
   1870 ; AVX1:       # %bb.0:
   1871 ; AVX1-NEXT:    vpslld $16, %xmm0, %xmm1
   1872 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1873 ; AVX1-NEXT:    vpslld $16, %xmm0, %xmm0
   1874 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1875 ; AVX1-NEXT:    retq
   1876 ;
   1877 ; AVX2OR512VL-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
   1878 ; AVX2OR512VL:       # %bb.0:
   1879 ; AVX2OR512VL-NEXT:    vpslld $16, %ymm0, %ymm0
   1880 ; AVX2OR512VL-NEXT:    retq
   1881   %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 0, i32 16, i32 2, i32 16, i32 4, i32 16, i32 6, i32 16, i32 8, i32 16, i32 10, i32 16, i32 12, i32 16, i32 14>
   1882   ret <16 x i16> %shuffle
   1883 }
   1884 
   1885 define <16 x i16> @shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12(<16 x i16> %a) {
   1886 ; AVX1-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
   1887 ; AVX1:       # %bb.0:
   1888 ; AVX1-NEXT:    vpsllq $48, %xmm0, %xmm1
   1889 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1890 ; AVX1-NEXT:    vpsllq $48, %xmm0, %xmm0
   1891 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1892 ; AVX1-NEXT:    retq
   1893 ;
   1894 ; AVX2OR512VL-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
   1895 ; AVX2OR512VL:       # %bb.0:
   1896 ; AVX2OR512VL-NEXT:    vpsllq $48, %ymm0, %ymm0
   1897 ; AVX2OR512VL-NEXT:    retq
   1898   %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 12>
   1899   ret <16 x i16> %shuffle
   1900 }
   1901 
   1902 define <16 x i16> @shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz(<16 x i16> %a) {
   1903 ; AVX1-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz:
   1904 ; AVX1:       # %bb.0:
   1905 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm1
   1906 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1907 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
   1908 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1909 ; AVX1-NEXT:    retq
   1910 ;
   1911 ; AVX2OR512VL-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz:
   1912 ; AVX2OR512VL:       # %bb.0:
   1913 ; AVX2OR512VL-NEXT:    vpsrld $16, %ymm0, %ymm0
   1914 ; AVX2OR512VL-NEXT:    retq
   1915   %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 1, i32 16, i32 3, i32 16, i32 5, i32 16, i32 7, i32 16, i32 9, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15, i32 16>
   1916   ret <16 x i16> %shuffle
   1917 }
   1918 
   1919 define <16 x i16> @shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz(<16 x i16> %a) {
   1920 ; AVX1-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:
   1921 ; AVX1:       # %bb.0:
   1922 ; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1923 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
   1924 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
   1925 ; AVX1-NEXT:    retq
   1926 ;
   1927 ; AVX2OR512VL-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:
   1928 ; AVX2OR512VL:       # %bb.0:
   1929 ; AVX2OR512VL-NEXT:    vpsrlq $32, %ymm0, %ymm0
   1930 ; AVX2OR512VL-NEXT:    retq
   1931   %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 16, i32 16, i32 10, i32 11, i32 16, i32 16, i32 14, i32 15, i32 16, i32 16>
   1932   ret <16 x i16> %shuffle
   1933 }
   1934 
   1935 define <16 x i16> @shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz(<16 x i16> %a) {
   1936 ; AVX1-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz:
   1937 ; AVX1:       # %bb.0:
   1938 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   1939 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
   1940 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   1941 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1942 ; AVX1-NEXT:    retq
   1943 ;
   1944 ; AVX2OR512VL-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz:
   1945 ; AVX2OR512VL:       # %bb.0:
   1946 ; AVX2OR512VL-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1947 ; AVX2OR512VL-NEXT:    retq
   1948   %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 16, i32 0, i32 0, i32 0, i32 17, i32 0, i32 0, i32 0, i32 18, i32 0, i32 0, i32 0, i32 19, i32 0, i32 0, i32 0>
   1949   ret <16 x i16> %shuffle
   1950 }
   1951 
   1952 define <16 x i16> @shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz(<16 x i16> %a) {
   1953 ; AVX1-LABEL: shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz:
   1954 ; AVX1:       # %bb.0:
   1955 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1956 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1957 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1958 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1959 ; AVX1-NEXT:    retq
   1960 ;
   1961 ; AVX2OR512VL-LABEL: shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz:
   1962 ; AVX2OR512VL:       # %bb.0:
   1963 ; AVX2OR512VL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1964 ; AVX2OR512VL-NEXT:    retq
   1965   %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 16, i32 0, i32 17, i32 0, i32 18, i32 0, i32 19, i32 0, i32 20, i32 0, i32 21, i32 0, i32 22, i32 0, i32 23, i32 0>
   1966   ret <16 x i16> %shuffle
   1967 }
   1968 
   1969 define <16 x i16> @shuffle_v16i16_28_zz_zz_zz_29_zz_zz_zz_30_zz_zz_zz_31_zz_zz_zz(<16 x i16> %a) {
   1970 ; AVX1-LABEL: shuffle_v16i16_28_zz_zz_zz_29_zz_zz_zz_30_zz_zz_zz_31_zz_zz_zz:
   1971 ; AVX1:       # %bb.0:
   1972 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1973 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1974 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
   1975 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
   1976 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   1977 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1978 ; AVX1-NEXT:    retq
   1979 ;
   1980 ; AVX2-LABEL: shuffle_v16i16_28_zz_zz_zz_29_zz_zz_zz_30_zz_zz_zz_31_zz_zz_zz:
   1981 ; AVX2:       # %bb.0:
   1982 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
   1983 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1984 ; AVX2-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
   1985 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
   1986 ; AVX2-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   1987 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
   1988 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1989 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7],ymm0[8],ymm1[9,10,11],ymm0[12],ymm1[13,14,15]
   1990 ; AVX2-NEXT:    retq
   1991 ;
   1992 ; AVX512VL-LABEL: shuffle_v16i16_28_zz_zz_zz_29_zz_zz_zz_30_zz_zz_zz_31_zz_zz_zz:
   1993 ; AVX512VL:       # %bb.0:
   1994 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [28,1,2,3,29,5,6,7,30,9,10,11,31,13,14,15]
   1995 ; AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1996 ; AVX512VL-NEXT:    vpermt2w %ymm0, %ymm2, %ymm1
   1997 ; AVX512VL-NEXT:    vmovdqa %ymm1, %ymm0
   1998 ; AVX512VL-NEXT:    retq
   1999   %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 28, i32 0, i32 0, i32 0, i32 29, i32 0, i32 0, i32 0, i32 30, i32 0, i32 0, i32 0, i32 31, i32 0, i32 0, i32 0>
   2000   ret <16 x i16> %shuffle
   2001 }
   2002 
   2003 define <16 x i16> @shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14(<16 x i16> %a, <16 x i16> %b) {
   2004 ; AVX1-LABEL: shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14:
   2005 ; AVX1:       # %bb.0:
   2006 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2007 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   2008 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   2009 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   2010 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2011 ; AVX1-NEXT:    retq
   2012 ;
   2013 ; AVX2OR512VL-LABEL: shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14:
   2014 ; AVX2OR512VL:       # %bb.0:
   2015 ; AVX2OR512VL-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm1[30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
   2016 ; AVX2OR512VL-NEXT:    retq
   2017   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
   2018   ret <16 x i16> %shuffle
   2019 }
   2020 
   2021 define <16 x i16> @shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24(<16 x i16> %a, <16 x i16> %b) {
   2022 ; AVX1-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24:
   2023 ; AVX1:       # %bb.0:
   2024 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2025 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   2026 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1]
   2027 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1]
   2028 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2029 ; AVX1-NEXT:    retq
   2030 ;
   2031 ; AVX2OR512VL-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24:
   2032 ; AVX2OR512VL:       # %bb.0:
   2033 ; AVX2OR512VL-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1],ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17]
   2034 ; AVX2OR512VL-NEXT:    retq
   2035   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24>
   2036   ret <16 x i16> %shuffle
   2037 }
   2038 
   2039 define <16 x i16> @shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8(<16 x i16> %a, <16 x i16> %b) {
   2040 ; AVX1-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8:
   2041 ; AVX1:       # %bb.0:
   2042 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2043 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   2044 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1]
   2045 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
   2046 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2047 ; AVX1-NEXT:    retq
   2048 ;
   2049 ; AVX2OR512VL-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8:
   2050 ; AVX2OR512VL:       # %bb.0:
   2051 ; AVX2OR512VL-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
   2052 ; AVX2OR512VL-NEXT:    retq
   2053   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 00, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 8>
   2054   ret <16 x i16> %shuffle
   2055 }
   2056 
   2057 define <16 x i16> @shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30(<16 x i16> %a, <16 x i16> %b) {
   2058 ; AVX1-LABEL: shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30:
   2059 ; AVX1:       # %bb.0:
   2060 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2061 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   2062 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   2063 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   2064 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2065 ; AVX1-NEXT:    retq
   2066 ;
   2067 ; AVX2OR512VL-LABEL: shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30:
   2068 ; AVX2OR512VL:       # %bb.0:
   2069 ; AVX2OR512VL-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm0[30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
   2070 ; AVX2OR512VL-NEXT:    retq
   2071   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
   2072   ret <16 x i16> %shuffle
   2073 }
   2074 
   2075 define <16 x i16> @shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16(<16 x i16> %a, <16 x i16> %b) {
   2076 ; AVX1-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16:
   2077 ; AVX1:       # %bb.0:
   2078 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
   2079 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
   2080 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2081 ; AVX1-NEXT:    retq
   2082 ;
   2083 ; AVX2-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16:
   2084 ; AVX2:       # %bb.0:
   2085 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2086 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,18,19,20,21,22,23,24,25,26,27,28,29,30,31,16,17]
   2087 ; AVX2-NEXT:    retq
   2088 ;
   2089 ; AVX512VL-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16:
   2090 ; AVX512VL:       # %bb.0:
   2091 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,2,3,4,5,6,7,0,17,18,19,20,21,22,23,16]
   2092 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   2093 ; AVX512VL-NEXT:    retq
   2094   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16>
   2095   ret <16 x i16> %shuffle
   2096 }
   2097 
   2098 define <16 x i16> @shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22(<16 x i16> %a, <16 x i16> %b) {
   2099 ; AVX1-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22:
   2100 ; AVX1:       # %bb.0:
   2101 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   2102 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   2103 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2104 ; AVX1-NEXT:    retq
   2105 ;
   2106 ; AVX2-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22:
   2107 ; AVX2:       # %bb.0:
   2108 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2109 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,30,31,16,17,18,19,20,21,22,23,24,25,26,27,28,29]
   2110 ; AVX2-NEXT:    retq
   2111 ;
   2112 ; AVX512VL-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22:
   2113 ; AVX512VL:       # %bb.0:
   2114 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [7,0,1,2,3,4,5,6,23,16,17,18,19,20,21,22]
   2115 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   2116 ; AVX512VL-NEXT:    retq
   2117   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 23, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
   2118   ret <16 x i16> %shuffle
   2119 }
   2120 
   2121 define <16 x i16> @shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11(<16 x i16> %a, <16 x i16> %b) {
   2122 ; AVX1-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
   2123 ; AVX1:       # %bb.0:
   2124 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2125 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2126 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7]
   2127 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7]
   2128 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
   2129 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2130 ; AVX1-NEXT:    retq
   2131 ;
   2132 ; AVX2-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
   2133 ; AVX2:       # %bb.0:
   2134 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2135 ; AVX2-NEXT:    vpbroadcastq %xmm1, %xmm1
   2136 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
   2137 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   2138 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2139 ; AVX2-NEXT:    retq
   2140 ;
   2141 ; AVX512VL-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
   2142 ; AVX512VL:       # %bb.0:
   2143 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,0,1,2,3,2,11,8,9,8,9,10,11,10,11]
   2144 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   2145 ; AVX512VL-NEXT:    retq
   2146   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 11, i32 8, i32 9, i32 8, i32 9, i32 10, i32 11, i32 10, i32 11>
   2147   ret <16 x i16> %shuffle
   2148 }
   2149 
   2150 define <16 x i16> @shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09(<16 x i16> %a, <16 x i16> %b) {
   2151 ; AVX1-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09:
   2152 ; AVX1:       # %bb.0:
   2153 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2154 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
   2155 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
   2156 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0]
   2157 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2158 ; AVX1-NEXT:    retq
   2159 ;
   2160 ; AVX2-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09:
   2161 ; AVX2:       # %bb.0:
   2162 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2163 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   2164 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2165 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   2166 ; AVX2-NEXT:    retq
   2167 ;
   2168 ; AVX512VL-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09:
   2169 ; AVX512VL:       # %bb.0:
   2170 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [6,7,4,5,2,3,0,9,14,15,12,13,10,11,8,9]
   2171 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   2172 ; AVX512VL-NEXT:    retq
   2173   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 9, i32 14, i32 15, i32 12, i32 13, i32 10, i32 11, i32 8, i32 9>
   2174   ret <16 x i16> %shuffle
   2175 }
   2176 
   2177 define <16 x i16> @shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27(<16 x i16> %a, <16 x i16> %b) {
   2178 ; AVX1-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27:
   2179 ; AVX1:       # %bb.0:
   2180 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2181 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   2182 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7]
   2183 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
   2184 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15]
   2185 ; AVX1-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
   2186 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2187 ; AVX1-NEXT:    retq
   2188 ;
   2189 ; AVX2-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27:
   2190 ; AVX2:       # %bb.0:
   2191 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
   2192 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2193 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   2194 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2195 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
   2196 ; AVX2-NEXT:    retq
   2197 ;
   2198 ; AVX512VL-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27:
   2199 ; AVX512VL:       # %bb.0:
   2200 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [4,5,6,7,16,17,18,27,12,13,14,15,24,25,26,27]
   2201 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   2202 ; AVX512VL-NEXT:    retq
   2203   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 27, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27>
   2204   ret <16 x i16> %shuffle
   2205 }
   2206 
   2207 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   2208 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
   2209 ; AVX1:       # %bb.0:
   2210 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2211 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2212 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,1,4,5,6,7]
   2213 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
   2214 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7]
   2215 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
   2216 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2217 ; AVX1-NEXT:    retq
   2218 ;
   2219 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
   2220 ; AVX2-SLOW:       # %bb.0:
   2221 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2222 ; AVX2-SLOW-NEXT:    vpbroadcastw %xmm1, %xmm1
   2223 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,2,3,4,5,6,7,8,8,10,11,12,13,14,15]
   2224 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
   2225 ; AVX2-SLOW-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   2226 ; AVX2-SLOW-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2227 ; AVX2-SLOW-NEXT:    retq
   2228 ;
   2229 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
   2230 ; AVX2-FAST:       # %bb.0:
   2231 ; AVX2-FAST-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2232 ; AVX2-FAST-NEXT:    vpbroadcastw %xmm1, %xmm1
   2233 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
   2234 ; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   2235 ; AVX2-FAST-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2236 ; AVX2-FAST-NEXT:    retq
   2237 ;
   2238 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
   2239 ; AVX512VL:       # %bb.0:
   2240 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,8]
   2241 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   2242 ; AVX512VL-NEXT:    retq
   2243   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
   2244   ret <16 x i16> %shuffle
   2245 }
   2246 
   2247 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   2248 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
   2249 ; AVX1:       # %bb.0:
   2250 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2251 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   2252 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   2253 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,7]
   2254 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2255 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
   2256 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
   2257 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2258 ; AVX1-NEXT:    retq
   2259 ;
   2260 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
   2261 ; AVX2-SLOW:       # %bb.0:
   2262 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2263 ; AVX2-SLOW-NEXT:    vpsllq $48, %xmm1, %xmm1
   2264 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
   2265 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
   2266 ; AVX2-SLOW-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   2267 ; AVX2-SLOW-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2268 ; AVX2-SLOW-NEXT:    retq
   2269 ;
   2270 ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
   2271 ; AVX2-FAST:       # %bb.0:
   2272 ; AVX2-FAST-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2273 ; AVX2-FAST-NEXT:    vpsllq $48, %xmm1, %xmm1
   2274 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
   2275 ; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   2276 ; AVX2-FAST-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2277 ; AVX2-FAST-NEXT:    retq
   2278 ;
   2279 ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
   2280 ; AVX512VL:       # %bb.0:
   2281 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,4,4,4,12,8,8,8,8,12,12,12,12]
   2282 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   2283 ; AVX512VL-NEXT:    retq
   2284   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 12, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
   2285   ret <16 x i16> %shuffle
   2286 }
   2287 
   2288 define <16 x i16> @shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11(<16 x i16> %a, <16 x i16> %b) {
   2289 ; AVX1-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
   2290 ; AVX1:       # %bb.0:
   2291 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2292 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2293 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2294 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
   2295 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
   2296 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2297 ; AVX1-NEXT:    retq
   2298 ;
   2299 ; AVX2-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
   2300 ; AVX2:       # %bb.0:
   2301 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2302 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <255,255,255,255,255,255,0,0,u,u,u,u,u,u,u,u,255,255,255,255,255,255,255,255,u,u,u,u,u,u,u,u>
   2303 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2304 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
   2305 ; AVX2-NEXT:    retq
   2306 ;
   2307 ; AVX512VL-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
   2308 ; AVX512VL:       # %bb.0:
   2309 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = <u,0,u,1,u,2,u,11,u,8,u,9,u,10,u,11>
   2310 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   2311 ; AVX512VL-NEXT:    retq
   2312   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 11, i32 undef, i32 8, i32 undef, i32 9, i32 undef, i32 10, i32 undef, i32 11>
   2313   ret <16 x i16> %shuffle
   2314 }
   2315 
   2316 define <16 x i16> @shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15(<16 x i16> %a, <16 x i16> %b) {
   2317 ; AVX1-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
   2318 ; AVX1:       # %bb.0:
   2319 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2320 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   2321 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   2322 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
   2323 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
   2324 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   2325 ; AVX1-NEXT:    retq
   2326 ;
   2327 ; AVX2-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
   2328 ; AVX2:       # %bb.0:
   2329 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm1 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
   2330 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
   2331 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6,7]
   2332 ; AVX2-NEXT:    retq
   2333 ;
   2334 ; AVX512VL-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
   2335 ; AVX512VL:       # %bb.0:
   2336 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = <u,4,u,5,u,6,u,15,u,12,u,13,u,14,u,15>
   2337 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   2338 ; AVX512VL-NEXT:    retq
   2339   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 15, i32 undef, i32 12, i32 undef, i32 13, i32 undef, i32 14, i32 undef, i32 15>
   2340   ret <16 x i16> %shuffle
   2341 }
   2342 
   2343 define <16 x i16> @shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13(<16 x i16> %a, <16 x i16> %b) {
   2344 ; AVX1-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
   2345 ; AVX1:       # %bb.0:
   2346 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2347 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
   2348 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
   2349 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
   2350 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7]
   2351 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
   2352 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2353 ; AVX1-NEXT:    retq
   2354 ;
   2355 ; AVX2-SLOW-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
   2356 ; AVX2-SLOW:       # %bb.0:
   2357 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2358 ; AVX2-SLOW-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   2359 ; AVX2-SLOW-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2360 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[3,1,2,0,4,5,6,7,11,9,10,8,12,13,14,15]
   2361 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,3,2,4,5,7,6]
   2362 ; AVX2-SLOW-NEXT:    retq
   2363 ;
   2364 ; AVX2-FAST-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
   2365 ; AVX2-FAST:       # %bb.0:
   2366 ; AVX2-FAST-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2367 ; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   2368 ; AVX2-FAST-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2369 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[6,7,2,3,4,5,0,1,12,13,14,15,8,9,10,11,22,23,18,19,20,21,16,17,28,29,30,31,24,25,26,27]
   2370 ; AVX2-FAST-NEXT:    retq
   2371 ;
   2372 ; AVX512VL-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
   2373 ; AVX512VL:       # %bb.0:
   2374 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,1,2,0,6,7,4,13,11,9,10,8,14,15,12,13]
   2375 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   2376 ; AVX512VL-NEXT:    retq
   2377   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 13, i32 11, i32 9, i32 10, i32 8, i32 14, i32 15, i32 12, i32 13>
   2378   ret <16 x i16> %shuffle
   2379 }
   2380 
   2381 define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   2382 ; AVX1-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
   2383 ; AVX1:       # %bb.0:
   2384 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2385 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
   2386 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,14,15]
   2387 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2388 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
   2389 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2390 ; AVX1-NEXT:    retq
   2391 ;
   2392 ; AVX2-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
   2393 ; AVX2:       # %bb.0:
   2394 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2395 ; AVX2-NEXT:    vpbroadcastw %xmm1, %xmm1
   2396 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1,24,25,24,25,24,25,24,25,16,17,16,17,16,17,16,17]
   2397 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   2398 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2399 ; AVX2-NEXT:    retq
   2400 ;
   2401 ; AVX512VL-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
   2402 ; AVX512VL:       # %bb.0:
   2403 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [4,4,4,4,0,0,0,8,12,12,12,12,8,8,8,8]
   2404 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   2405 ; AVX512VL-NEXT:    retq
   2406   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 8, i32 12, i32 12, i32 12, i32 12, i32 8, i32 8, i32 8, i32 8>
   2407   ret <16 x i16> %shuffle
   2408 }
   2409 
   2410 define <16 x i16> @shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13(<16 x i16> %a, <16 x i16> %b) {
   2411 ; AVX1-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13:
   2412 ; AVX1:       # %bb.0:
   2413 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2414 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
   2415 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
   2416 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,0,3,2]
   2417 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2418 ; AVX1-NEXT:    retq
   2419 ;
   2420 ; AVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13:
   2421 ; AVX2:       # %bb.0:
   2422 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2423 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   2424 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2425 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
   2426 ; AVX2-NEXT:    retq
   2427 ;
   2428 ; AVX512VL-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13:
   2429 ; AVX512VL:       # %bb.0:
   2430 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [2,3,0,1,6,7,4,13,10,11,8,9,14,15,12,13]
   2431 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   2432 ; AVX512VL-NEXT:    retq
   2433   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 13, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
   2434   ret <16 x i16> %shuffle
   2435 }
   2436 
   2437 define <16 x i16> @shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13(<16 x i16> %a, <16 x i16> %b) {
   2438 ; AVX1-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
   2439 ; AVX1:       # %bb.0:
   2440 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2441 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
   2442 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
   2443 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
   2444 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7]
   2445 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
   2446 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2447 ; AVX1-NEXT:    retq
   2448 ;
   2449 ; AVX2-SLOW-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
   2450 ; AVX2-SLOW:       # %bb.0:
   2451 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2452 ; AVX2-SLOW-NEXT:    vmovdqa {{.*#+}} ymm2 = <255,255,u,u,255,255,255,255,255,255,0,0,255,255,255,255,255,255,u,u,255,255,255,255,255,255,255,255,255,255,255,255>
   2453 ; AVX2-SLOW-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2454 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[2,3,0,2,4,5,6,7,10,11,8,10,12,13,14,15]
   2455 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,3,2,4,5,7,6]
   2456 ; AVX2-SLOW-NEXT:    retq
   2457 ;
   2458 ; AVX2-FAST-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
   2459 ; AVX2-FAST:       # %bb.0:
   2460 ; AVX2-FAST-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2461 ; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} ymm2 = <255,255,u,u,255,255,255,255,255,255,0,0,255,255,255,255,255,255,u,u,255,255,255,255,255,255,255,255,255,255,255,255>
   2462 ; AVX2-FAST-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2463 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[4,5,6,7,0,1,4,5,12,13,14,15,8,9,10,11,20,21,22,23,16,17,20,21,28,29,30,31,24,25,26,27]
   2464 ; AVX2-FAST-NEXT:    retq
   2465 ;
   2466 ; AVX512VL-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
   2467 ; AVX512VL:       # %bb.0:
   2468 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [2,3,0,2,6,7,4,13,10,11,8,10,14,15,12,13]
   2469 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   2470 ; AVX512VL-NEXT:    retq
   2471   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 13, i32 10, i32 11, i32 8, i32 10, i32 14, i32 15, i32 12, i32 13>
   2472   ret <16 x i16> %shuffle
   2473 }
   2474 
   2475 define <16 x i16> @shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15(<16 x i16> %a, <16 x i16> %b) {
   2476 ; AVX1-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15:
   2477 ; AVX1:       # %bb.0:
   2478 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2479 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
   2480 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   2481 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3]
   2482 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7]
   2483 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2484 ; AVX1-NEXT:    retq
   2485 ;
   2486 ; AVX2-SLOW-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15:
   2487 ; AVX2-SLOW:       # %bb.0:
   2488 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2489 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
   2490 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15]
   2491 ; AVX2-SLOW-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   2492 ; AVX2-SLOW-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2493 ; AVX2-SLOW-NEXT:    retq
   2494 ;
   2495 ; AVX2-FAST-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15:
   2496 ; AVX2-FAST:       # %bb.0:
   2497 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm1 = ymm0[4,5,6,7,0,1,2,3,12,13,14,15,8,9,14,15,20,21,22,23,16,17,18,19,28,29,30,31,24,25,30,31]
   2498 ; AVX2-FAST-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
   2499 ; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   2500 ; AVX2-FAST-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
   2501 ; AVX2-FAST-NEXT:    retq
   2502 ;
   2503 ; AVX512VL-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15:
   2504 ; AVX512VL:       # %bb.0:
   2505 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [2,3,0,1,6,7,4,15,10,11,8,9,14,15,12,15]
   2506 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   2507 ; AVX512VL-NEXT:    retq
   2508   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 15, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 15>
   2509   ret <16 x i16> %shuffle
   2510 }
   2511 
   2512 define <16 x i16> @shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08(<16 x i16> %a, <16 x i16> %b) {
   2513 ; AVX1-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08:
   2514 ; AVX1:       # %bb.0:
   2515 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2516 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
   2517 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2518 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
   2519 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2520 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2521 ; AVX1-NEXT:    retq
   2522 ;
   2523 ; AVX2-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08:
   2524 ; AVX2:       # %bb.0:
   2525 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2526 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   2527 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2528 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1,30,31,26,27,28,29,24,25,22,23,18,19,20,21,16,17]
   2529 ; AVX2-NEXT:    retq
   2530 ;
   2531 ; AVX512VL-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08:
   2532 ; AVX512VL:       # %bb.0:
   2533 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [7,5,6,4,3,1,2,8,15,13,14,12,11,9,10,8]
   2534 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   2535 ; AVX512VL-NEXT:    retq
   2536   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 8, i32 15, i32 13, i32 14, i32 12, i32 11, i32 9, i32 10, i32 8>
   2537   ret <16 x i16> %shuffle
   2538 }
   2539 
   2540 define <16 x i16> @shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08(<16 x i16> %a, <16 x i16> %b) {
   2541 ; AVX1-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
   2542 ; AVX1:       # %bb.0:
   2543 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2544 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
   2545 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,2,3]
   2546 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2547 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
   2548 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2549 ; AVX1-NEXT:    retq
   2550 ;
   2551 ; AVX2-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
   2552 ; AVX2:       # %bb.0:
   2553 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2554 ; AVX2-NEXT:    vpbroadcastw %xmm1, %xmm1
   2555 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1,18,19,16,17,26,27,24,25,26,27,24,25,18,19,16,17]
   2556 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   2557 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2558 ; AVX2-NEXT:    retq
   2559 ;
   2560 ; AVX512VL-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
   2561 ; AVX512VL:       # %bb.0:
   2562 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,0,5,4,5,4,1,8,9,8,13,12,13,12,9,8]
   2563 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   2564 ; AVX512VL-NEXT:    retq
   2565   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 8, i32 9, i32 8, i32 13, i32 12, i32 13, i32 12, i32 9, i32 8>
   2566   ret <16 x i16> %shuffle
   2567 }
   2568 
   2569 define <16 x i16> @shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08(<16 x i16> %a, <16 x i16> %b) {
   2570 ; AVX1-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
   2571 ; AVX1:       # %bb.0:
   2572 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2573 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
   2574 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,2,3]
   2575 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2576 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
   2577 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2578 ; AVX1-NEXT:    retq
   2579 ;
   2580 ; AVX2-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
   2581 ; AVX2:       # %bb.0:
   2582 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2583 ; AVX2-NEXT:    vpbroadcastw %xmm1, %xmm1
   2584 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1,26,27,24,25,18,19,16,17,26,27,24,25,18,19,16,17]
   2585 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   2586 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2587 ; AVX2-NEXT:    retq
   2588 ;
   2589 ; AVX512VL-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
   2590 ; AVX512VL:       # %bb.0:
   2591 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [5,4,1,0,5,4,1,8,13,12,9,8,13,12,9,8]
   2592 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   2593 ; AVX512VL-NEXT:    retq
   2594   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 8, i32 13, i32 12, i32 9, i32 8, i32 13, i32 12, i32 9, i32 8>
   2595   ret <16 x i16> %shuffle
   2596 }
   2597 
   2598 define <16 x i16> @shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12(<16 x i16> %a, <16 x i16> %b) {
   2599 ; AVX1-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
   2600 ; AVX1:       # %bb.0:
   2601 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2602 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   2603 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,2,3]
   2604 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2605 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
   2606 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2607 ; AVX1-NEXT:    retq
   2608 ;
   2609 ; AVX2-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
   2610 ; AVX2:       # %bb.0:
   2611 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2612 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm1
   2613 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9,26,27,24,25,18,19,16,17,18,19,16,17,26,27,24,25]
   2614 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   2615 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2616 ; AVX2-NEXT:    retq
   2617 ;
   2618 ; AVX512VL-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
   2619 ; AVX512VL:       # %bb.0:
   2620 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [5,4,1,0,1,0,5,12,13,12,9,8,9,8,13,12]
   2621 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   2622 ; AVX512VL-NEXT:    retq
   2623   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 12, i32 13, i32 12, i32 9, i32 8, i32 9, i32 8, i32 13, i32 12>
   2624   ret <16 x i16> %shuffle
   2625 }
   2626 
   2627 define <16 x i16> @shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08(<16 x i16> %a, <16 x i16> %b) {
   2628 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
   2629 ; AVX1:       # %bb.0:
   2630 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2631 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
   2632 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,2,3]
   2633 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2634 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
   2635 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2636 ; AVX1-NEXT:    retq
   2637 ;
   2638 ; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
   2639 ; AVX2:       # %bb.0:
   2640 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2641 ; AVX2-NEXT:    vpbroadcastw %xmm1, %xmm1
   2642 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1,16,17,24,25,24,25,16,17,16,17,24,25,24,25,16,17]
   2643 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   2644 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2645 ; AVX2-NEXT:    retq
   2646 ;
   2647 ; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
   2648 ; AVX512VL:       # %bb.0:
   2649 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,4,4,0,0,4,4,8,8,12,12,8,8,12,12,8]
   2650 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   2651 ; AVX512VL-NEXT:    retq
   2652   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 8>
   2653   ret <16 x i16> %shuffle
   2654 }
   2655 
   2656 define <16 x i16> @shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12(<16 x i16> %a, <16 x i16> %b) {
   2657 ; AVX1-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
   2658 ; AVX1:       # %bb.0:
   2659 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2660 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   2661 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,2,3]
   2662 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2663 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
   2664 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2665 ; AVX1-NEXT:    retq
   2666 ;
   2667 ; AVX2-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
   2668 ; AVX2:       # %bb.0:
   2669 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2670 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm1
   2671 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9,24,25,16,17,16,17,24,25,24,25,16,17,16,17,24,25]
   2672 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   2673 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2674 ; AVX2-NEXT:    retq
   2675 ;
   2676 ; AVX512VL-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
   2677 ; AVX512VL:       # %bb.0:
   2678 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [4,0,0,4,4,0,0,12,12,8,8,12,12,8,8,12]
   2679 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   2680 ; AVX512VL-NEXT:    retq
   2681   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 12>
   2682   ret <16 x i16> %shuffle
   2683 }
   2684 
   2685 define <16 x i16> @shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11(<16 x i16> %a, <16 x i16> %b) {
   2686 ; AVX1-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11:
   2687 ; AVX1:       # %bb.0:
   2688 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2689 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
   2690 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2691 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2692 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2693 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2694 ; AVX1-NEXT:    retq
   2695 ;
   2696 ; AVX2-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11:
   2697 ; AVX2:       # %bb.0:
   2698 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2699 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   2700 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2701 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7,20,21,28,29,24,25,16,17,26,27,18,19,30,31,22,23]
   2702 ; AVX2-NEXT:    retq
   2703 ;
   2704 ; AVX512VL-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11:
   2705 ; AVX512VL:       # %bb.0:
   2706 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [2,6,4,0,5,1,7,11,10,14,12,8,13,9,15,11]
   2707 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   2708 ; AVX512VL-NEXT:    retq
   2709   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 11, i32 10, i32 14, i32 12, i32 8, i32 13, i32 9, i32 15, i32 11>
   2710   ret <16 x i16> %shuffle
   2711 }
   2712 
   2713 define <16 x i16> @shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11(<16 x i16> %a, <16 x i16> %b) {
   2714 ; AVX1-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11:
   2715 ; AVX1:       # %bb.0:
   2716 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2717 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
   2718 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2719 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   2720 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2721 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2722 ; AVX1-NEXT:    retq
   2723 ;
   2724 ; AVX2-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11:
   2725 ; AVX2:       # %bb.0:
   2726 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2727 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   2728 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2729 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7,20,21,16,17,28,29,24,25,26,27,18,19,30,31,22,23]
   2730 ; AVX2-NEXT:    retq
   2731 ;
   2732 ; AVX512VL-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11:
   2733 ; AVX512VL:       # %bb.0:
   2734 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [2,0,6,4,5,1,7,11,10,8,14,12,13,9,15,11]
   2735 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   2736 ; AVX512VL-NEXT:    retq
   2737   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 11, i32 10, i32 8, i32 14, i32 12, i32 13, i32 9, i32 15, i32 11>
   2738   ret <16 x i16> %shuffle
   2739 }
   2740 
   2741 define <16 x i16> @shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13(<16 x i16> %a, <16 x i16> %b) {
   2742 ; AVX1-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13:
   2743 ; AVX1:       # %bb.0:
   2744 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2745 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
   2746 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2747 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
   2748 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2749 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2750 ; AVX1-NEXT:    retq
   2751 ;
   2752 ; AVX2-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13:
   2753 ; AVX2:       # %bb.0:
   2754 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2755 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   2756 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2757 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11,20,21,28,29,24,25,16,17,18,19,22,23,30,31,26,27]
   2758 ; AVX2-NEXT:    retq
   2759 ;
   2760 ; AVX512VL-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13:
   2761 ; AVX512VL:       # %bb.0:
   2762 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [2,6,4,0,1,3,7,13,10,14,12,8,9,11,15,13]
   2763 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   2764 ; AVX512VL-NEXT:    retq
   2765   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 13, i32 10, i32 14, i32 12, i32 8, i32 9, i32 11, i32 15, i32 13>
   2766   ret <16 x i16> %shuffle
   2767 }
   2768 
   2769 define <16 x i16> @shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11(<16 x i16> %a, <16 x i16> %b) {
   2770 ; AVX1-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
   2771 ; AVX1:       # %bb.0:
   2772 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2773 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
   2774 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   2775 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
   2776 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   2777 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   2778 ; AVX1-NEXT:    retq
   2779 ;
   2780 ; AVX2-SLOW-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
   2781 ; AVX2-SLOW:       # %bb.0:
   2782 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2783 ; AVX2-SLOW-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3,4,5,6,7]
   2784 ; AVX2-SLOW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7,28,29,28,29,30,31,26,27,18,19,28,29,24,25,22,23]
   2785 ; AVX2-SLOW-NEXT:    retq
   2786 ;
   2787 ; AVX2-FAST-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
   2788 ; AVX2-FAST:       # %bb.0:
   2789 ; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,5,2,3,4,5,6,7]
   2790 ; AVX2-FAST-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   2791 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7,28,29,28,29,30,31,26,27,18,19,28,29,24,25,22,23]
   2792 ; AVX2-FAST-NEXT:    retq
   2793 ;
   2794 ; AVX512VL-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
   2795 ; AVX512VL:       # %bb.0:
   2796 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [6,6,7,5,1,6,4,11,14,14,15,13,9,14,12,11]
   2797 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   2798 ; AVX512VL-NEXT:    retq
   2799   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 11, i32 14, i32 14, i32 15, i32 13, i32 9, i32 14, i32 12, i32 11>
   2800   ret <16 x i16> %shuffle
   2801 }
   2802 
   2803 define <16 x i16> @shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   2804 ; AVX1-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
   2805 ; AVX1:       # %bb.0:
   2806 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2807 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   2808 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,14,15]
   2809 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2810 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
   2811 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2812 ; AVX1-NEXT:    retq
   2813 ;
   2814 ; AVX2-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
   2815 ; AVX2:       # %bb.0:
   2816 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2817 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm1
   2818 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9,16,17,16,17,24,25,24,25,24,25,24,25,24,25,24,25]
   2819 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   2820 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2821 ; AVX2-NEXT:    retq
   2822 ;
   2823 ; AVX512VL-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
   2824 ; AVX512VL:       # %bb.0:
   2825 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,4,4,4,4,4,12,8,8,12,12,12,12,12,12]
   2826 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   2827 ; AVX512VL-NEXT:    retq
   2828   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 12, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
   2829   ret <16 x i16> %shuffle
   2830 }
   2831 
   2832 define <16 x i16> @shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   2833 ; AVX1-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
   2834 ; AVX1:       # %bb.0:
   2835 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2836 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   2837 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,14,15]
   2838 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2839 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
   2840 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2841 ; AVX1-NEXT:    retq
   2842 ;
   2843 ; AVX2-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
   2844 ; AVX2:       # %bb.0:
   2845 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2846 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm1
   2847 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9,24,25,24,25,16,17,16,17,24,25,24,25,24,25,24,25]
   2848 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   2849 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2850 ; AVX2-NEXT:    retq
   2851 ;
   2852 ; AVX512VL-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
   2853 ; AVX512VL:       # %bb.0:
   2854 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [4,4,0,0,4,4,4,12,12,12,8,8,12,12,12,12]
   2855 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   2856 ; AVX512VL-NEXT:    retq
   2857   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 12, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
   2858   ret <16 x i16> %shuffle
   2859 }
   2860 
   2861 define <16 x i16> @shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   2862 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
   2863 ; AVX1:       # %bb.0:
   2864 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2865 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   2866 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
   2867 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2868 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
   2869 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2870 ; AVX1-NEXT:    retq
   2871 ;
   2872 ; AVX2-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
   2873 ; AVX2:       # %bb.0:
   2874 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2875 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm1
   2876 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9,16,17,24,25,24,25,16,17,24,25,24,25,24,25,24,25]
   2877 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   2878 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2879 ; AVX2-NEXT:    retq
   2880 ;
   2881 ; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
   2882 ; AVX512VL:       # %bb.0:
   2883 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,4,4,0,4,4,4,12,8,12,12,8,12,12,12,12]
   2884 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   2885 ; AVX512VL-NEXT:    retq
   2886   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 12, i32 8, i32 12, i32 12, i32 8, i32 12, i32 12, i32 12, i32 12>
   2887   ret <16 x i16> %shuffle
   2888 }
   2889 
   2890 define <16 x i16> @shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
   2891 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
   2892 ; AVX1:       # %bb.0:
   2893 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2894 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
   2895 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,14,15]
   2896 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2897 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
   2898 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2899 ; AVX1-NEXT:    retq
   2900 ;
   2901 ; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
   2902 ; AVX2:       # %bb.0:
   2903 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2904 ; AVX2-NEXT:    vpbroadcastw %xmm1, %xmm1
   2905 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1,16,17,24,25,24,25,16,17,16,17,16,17,16,17,16,17]
   2906 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   2907 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2908 ; AVX2-NEXT:    retq
   2909 ;
   2910 ; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
   2911 ; AVX512VL:       # %bb.0:
   2912 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,4,4,0,0,0,0,8,8,12,12,8,8,8,8,8]
   2913 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   2914 ; AVX512VL-NEXT:    retq
   2915   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 8, i32 8, i32 8>
   2916   ret <16 x i16> %shuffle
   2917 }
   2918 
   2919 define <16 x i16> @shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
   2920 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
   2921 ; AVX1:       # %bb.0:
   2922 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2923 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
   2924 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
   2925 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   2926 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
   2927 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7]
   2928 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2929 ; AVX1-NEXT:    retq
   2930 ;
   2931 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
   2932 ; AVX2-SLOW:       # %bb.0:
   2933 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2934 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
   2935 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,2,2,0,4,5,6,7,8,10,10,8,12,13,14,15]
   2936 ; AVX2-SLOW-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   2937 ; AVX2-SLOW-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2938 ; AVX2-SLOW-NEXT:    retq
   2939 ;
   2940 ; AVX2-FAST-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
   2941 ; AVX2-FAST:       # %bb.0:
   2942 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13,14,15,16,17,24,25,24,25,16,17,24,25,26,27,28,29,30,31]
   2943 ; AVX2-FAST-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
   2944 ; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   2945 ; AVX2-FAST-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
   2946 ; AVX2-FAST-NEXT:    retq
   2947 ;
   2948 ; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
   2949 ; AVX512VL:       # %bb.0:
   2950 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,4,4,0,4,5,6,15,8,12,12,8,12,13,14,15]
   2951 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   2952 ; AVX512VL-NEXT:    retq
   2953   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 15, i32 8, i32 12, i32 12, i32 8, i32 12, i32 13, i32 14, i32 15>
   2954   ret <16 x i16> %shuffle
   2955 }
   2956 
   2957 define <16 x i16> @shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   2958 ; AVX1-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
   2959 ; AVX1:       # %bb.0:
   2960 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2961 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   2962 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,14,15]
   2963 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2964 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
   2965 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2966 ; AVX1-NEXT:    retq
   2967 ;
   2968 ; AVX2-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
   2969 ; AVX2:       # %bb.0:
   2970 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2971 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm1
   2972 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9,16,17,18,19,24,25,24,25,24,25,24,25,24,25,24,25]
   2973 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <255,255,u,u,255,255,255,255,255,255,255,255,255,255,0,0,255,255,u,u,255,255,255,255,255,255,255,255,255,255,255,255>
   2974 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   2975 ; AVX2-NEXT:    retq
   2976 ;
   2977 ; AVX512VL-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
   2978 ; AVX512VL:       # %bb.0:
   2979 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = <0,u,4,4,4,4,4,12,8,u,12,12,12,12,12,12>
   2980 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   2981 ; AVX512VL-NEXT:    retq
   2982   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 12, i32 8, i32 undef, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
   2983   ret <16 x i16> %shuffle
   2984 }
   2985 
   2986 define <16 x i16> @shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   2987 ; AVX1-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
   2988 ; AVX1:       # %bb.0:
   2989 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2990 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   2991 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
   2992 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   2993 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
   2994 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2995 ; AVX1-NEXT:    retq
   2996 ;
   2997 ; AVX2-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
   2998 ; AVX2:       # %bb.0:
   2999 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   3000 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm1
   3001 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9,24,25,24,25,24,25,16,17,24,25,24,25,24,25,24,25]
   3002 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <255,255,255,255,u,u,255,255,255,255,255,255,255,255,0,0,255,255,255,255,u,u,255,255,255,255,255,255,255,255,255,255>
   3003 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   3004 ; AVX2-NEXT:    retq
   3005 ;
   3006 ; AVX512VL-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
   3007 ; AVX512VL:       # %bb.0:
   3008 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = <4,4,u,0,4,4,4,12,12,12,u,8,12,12,12,12>
   3009 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   3010 ; AVX512VL-NEXT:    retq
   3011   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 12, i32 12, i32 12, i32 undef, i32 8, i32 12, i32 12, i32 12, i32 12>
   3012   ret <16 x i16> %shuffle
   3013 }
   3014 
   3015 define <16 x i16> @shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
   3016 ; AVX1-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
   3017 ; AVX1:       # %bb.0:
   3018 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   3019 ; AVX1-NEXT:    vpsllq $48, %xmm1, %xmm2
   3020 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
   3021 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   3022 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
   3023 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   3024 ; AVX1-NEXT:    retq
   3025 ;
   3026 ; AVX2-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
   3027 ; AVX2:       # %bb.0:
   3028 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   3029 ; AVX2-NEXT:    vpsllq $48, %xmm1, %xmm1
   3030 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9,16,17,24,25,24,25,16,17,24,25,24,25,24,25,24,25]
   3031 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <u,u,255,255,255,255,255,255,255,255,255,255,255,255,0,0,u,u,255,255,255,255,255,255,255,255,255,255,255,255,255,255>
   3032 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   3033 ; AVX2-NEXT:    retq
   3034 ;
   3035 ; AVX512VL-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
   3036 ; AVX512VL:       # %bb.0:
   3037 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = <u,4,4,0,4,4,4,12,u,12,12,8,12,12,12,12>
   3038 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   3039 ; AVX512VL-NEXT:    retq
   3040   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 12, i32 undef, i32 12, i32 12, i32 8, i32 12, i32 12, i32 12, i32 12>
   3041   ret <16 x i16> %shuffle
   3042 }
   3043 
   3044 define <16 x i16> @shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   3045 ; AVX1-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu:
   3046 ; AVX1:       # %bb.0:
   3047 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   3048 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
   3049 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
   3050 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   3051 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   3052 ; AVX1-NEXT:    retq
   3053 ;
   3054 ; AVX2OR512VL-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu:
   3055 ; AVX2OR512VL:       # %bb.0:
   3056 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15,16,17,18,19,20,21,30,31,20,21,30,31,28,29,30,31]
   3057 ; AVX2OR512VL-NEXT:    retq
   3058   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
   3059   ret <16 x i16> %shuffle
   3060 }
   3061 
   3062 define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11(<16 x i16> %a, <16 x i16> %b) {
   3063 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11:
   3064 ; AVX1:       # %bb.0:
   3065 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   3066 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
   3067 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
   3068 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
   3069 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   3070 ; AVX1-NEXT:    retq
   3071 ;
   3072 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11:
   3073 ; AVX2:       # %bb.0:
   3074 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,1,2,3]
   3075 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7,28,29,22,23,20,21,22,23,24,25,26,27,28,29,22,23]
   3076 ; AVX2-NEXT:    retq
   3077 ;
   3078 ; AVX512VL-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11:
   3079 ; AVX512VL:       # %bb.0:
   3080 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = <u,u,u,u,4,5,6,11,u,u,u,u,12,13,14,11>
   3081 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   3082 ; AVX512VL-NEXT:    retq
   3083   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11>
   3084   ret <16 x i16> %shuffle
   3085 }
   3086 
   3087 define <16 x i16> @shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   3088 ; AVX1-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu:
   3089 ; AVX1:       # %bb.0:
   3090 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   3091 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
   3092 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
   3093 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   3094 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   3095 ; AVX1-NEXT:    retq
   3096 ;
   3097 ; AVX2OR512VL-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu:
   3098 ; AVX2OR512VL:       # %bb.0:
   3099 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3,24,25,26,27,28,29,22,23,24,25,26,27,16,17,18,19]
   3100 ; AVX2OR512VL-NEXT:    retq
   3101   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
   3102   ret <16 x i16> %shuffle
   3103 }
   3104 
   3105 define <16 x i16> @shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15(<16 x i16> %a) {
   3106 ; AVX1-LABEL: shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15:
   3107 ; AVX1:       # %bb.0:
   3108 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[1,1,2,3,4,5,6,7]
   3109 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
   3110 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4,5,6,7]
   3111 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
   3112 ; AVX1-NEXT:    retq
   3113 ;
   3114 ; AVX2OR512VL-LABEL: shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15:
   3115 ; AVX2OR512VL:       # %bb.0:
   3116 ; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[2,3],zero,zero,ymm0[4,5],zero,zero,ymm0[8,9,u,u,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
   3117 ; AVX2OR512VL-NEXT:    retq
   3118   %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 1, i32 16, i32 2, i32 16, i32 4, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   3119   ret <16 x i16> %shuffle
   3120 }
   3121 
   3122 define <16 x i16> @shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11(<16 x i16> %a, <16 x i16> %b) {
   3123 ; AVX1-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11:
   3124 ; AVX1:       # %bb.0:
   3125 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   3126 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
   3127 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   3128 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   3129 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   3130 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   3131 ; AVX1-NEXT:    retq
   3132 ;
   3133 ; AVX2-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11:
   3134 ; AVX2:       # %bb.0:
   3135 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   3136 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   3137 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   3138 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7,16,17,18,19,20,21,30,31,24,25,26,27,28,29,22,23]
   3139 ; AVX2-NEXT:    retq
   3140 ;
   3141 ; AVX512VL-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11:
   3142 ; AVX512VL:       # %bb.0:
   3143 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,2,7,4,5,6,11,8,9,10,15,12,13,14,11]
   3144 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   3145 ; AVX512VL-NEXT:    retq
   3146   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 11, i32 8, i32 9, i32 10, i32 15, i32 12, i32 13, i32 14, i32 11>
   3147   ret <16 x i16> %shuffle
   3148 }
   3149 
   3150 define <16 x i16> @shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15(<16 x i16> %a, <16 x i16> %b) {
   3151 ; AVX1-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15:
   3152 ; AVX1:       # %bb.0:
   3153 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   3154 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,2,3]
   3155 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   3156 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
   3157 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   3158 ; AVX1-NEXT:    retq
   3159 ;
   3160 ; AVX2-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15:
   3161 ; AVX2:       # %bb.0:
   3162 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15,24,25,26,27,28,29,22,23,16,17,18,19,20,21,30,31]
   3163 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
   3164 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   3165 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
   3166 ; AVX2-NEXT:    retq
   3167 ;
   3168 ; AVX512VL-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15:
   3169 ; AVX512VL:       # %bb.0:
   3170 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [4,5,6,3,0,1,2,15,12,13,14,11,8,9,10,15]
   3171 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   3172 ; AVX512VL-NEXT:    retq
   3173   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 15, i32 12, i32 13, i32 14, i32 11, i32 8, i32 9, i32 10, i32 15>
   3174   ret <16 x i16> %shuffle
   3175 }
   3176 
   3177 define <16 x i16> @shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13(<16 x i16> %a, <16 x i16> %b) {
   3178 ; AVX1-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
   3179 ; AVX1:       # %bb.0:
   3180 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   3181 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
   3182 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm3
   3183 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
   3184 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   3185 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   3186 ; AVX1-NEXT:    retq
   3187 ;
   3188 ; AVX2-SLOW-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
   3189 ; AVX2-SLOW:       # %bb.0:
   3190 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   3191 ; AVX2-SLOW-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7]
   3192 ; AVX2-SLOW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11,22,23,30,31,18,19,16,17,20,21,30,31,22,23,26,27]
   3193 ; AVX2-SLOW-NEXT:    retq
   3194 ;
   3195 ; AVX2-FAST-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
   3196 ; AVX2-FAST:       # %bb.0:
   3197 ; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,6,3,4,5,6,7]
   3198 ; AVX2-FAST-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   3199 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11,22,23,30,31,18,19,16,17,20,21,30,31,22,23,26,27]
   3200 ; AVX2-FAST-NEXT:    retq
   3201 ;
   3202 ; AVX512VL-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
   3203 ; AVX512VL:       # %bb.0:
   3204 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,7,1,0,2,7,3,13,11,15,9,8,10,15,11,13]
   3205 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   3206 ; AVX512VL-NEXT:    retq
   3207   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 13, i32 11, i32 15, i32 9, i32 8, i32 10, i32 15, i32 11, i32 13>
   3208   ret <16 x i16> %shuffle
   3209 }
   3210 
   3211 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) {
   3212 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
   3213 ; AVX1:       # %bb.0:
   3214 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   3215 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   3216 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
   3217 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   3218 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15]
   3219 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   3220 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   3221 ; AVX1-NEXT:    retq
   3222 ;
   3223 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
   3224 ; AVX2:       # %bb.0:
   3225 ; AVX2-NEXT:    vpermq {{.*#+}} ymm2 = ymm1[2,3,0,1]
   3226 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = <255,255,255,255,255,255,0,0,u,u,u,u,u,u,u,u,255,255,255,255,255,255,255,255,u,u,u,u,u,u,u,u>
   3227 ; AVX2-NEXT:    vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
   3228 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
   3229 ; AVX2-NEXT:    retq
   3230 ;
   3231 ; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
   3232 ; AVX512VL:       # %bb.0:
   3233 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,16,1,17,2,18,3,27,8,24,9,25,10,26,11,27]
   3234 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   3235 ; AVX512VL-NEXT:    retq
   3236   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 27, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
   3237   ret <16 x i16> %shuffle
   3238 }
   3239 
   3240 define <16 x i16> @shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31(<16 x i16> %a, <16 x i16> %b) {
   3241 ; AVX1-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31:
   3242 ; AVX1:       # %bb.0:
   3243 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   3244 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   3245 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm3[2,3,0,1]
   3246 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
   3247 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
   3248 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15]
   3249 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   3250 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3251 ; AVX1-NEXT:    retq
   3252 ;
   3253 ; AVX2-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31:
   3254 ; AVX2:       # %bb.0:
   3255 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
   3256 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15,16,17,24,25,18,19,26,27,20,21,28,29,22,23,30,31]
   3257 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
   3258 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   3259 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
   3260 ; AVX2-NEXT:    retq
   3261 ;
   3262 ; AVX512VL-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31:
   3263 ; AVX512VL:       # %bb.0:
   3264 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,20,1,21,2,22,3,31,8,28,9,29,10,30,11,31]
   3265 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   3266 ; AVX512VL-NEXT:    retq
   3267   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 20, i32 1, i32 21, i32 2, i32 22, i32 3, i32 31, i32 8, i32 28, i32 9, i32 29, i32 10, i32 30, i32 11, i32 31>
   3268   ret <16 x i16> %shuffle
   3269 }
   3270 
   3271 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
   3272 ; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
   3273 ; AVX1:       # %bb.0:
   3274 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   3275 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   3276 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
   3277 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   3278 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15]
   3279 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   3280 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   3281 ; AVX1-NEXT:    retq
   3282 ;
   3283 ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
   3284 ; AVX2:       # %bb.0:
   3285 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm0 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
   3286 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm2 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
   3287 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
   3288 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2],ymm1[3],ymm2[4,5,6,7]
   3289 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
   3290 ; AVX2-NEXT:    retq
   3291 ;
   3292 ; AVX512VL-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
   3293 ; AVX512VL:       # %bb.0:
   3294 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [4,20,5,21,6,22,7,31,12,28,13,29,14,30,15,31]
   3295 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   3296 ; AVX512VL-NEXT:    retq
   3297   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 31, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
   3298   ret <16 x i16> %shuffle
   3299 }
   3300 
   3301 define <16 x i16> @shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27(<16 x i16> %a, <16 x i16> %b) {
   3302 ; AVX1-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27:
   3303 ; AVX1:       # %bb.0:
   3304 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   3305 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   3306 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
   3307 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
   3308 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   3309 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15]
   3310 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   3311 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   3312 ; AVX1-NEXT:    retq
   3313 ;
   3314 ; AVX2-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27:
   3315 ; AVX2:       # %bb.0:
   3316 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
   3317 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   3318 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   3319 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   3320 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,0,1,10,11,2,3,12,13,4,5,14,15,6,7,24,25,16,17,26,27,18,19,28,29,20,21,30,31,22,23]
   3321 ; AVX2-NEXT:    retq
   3322 ;
   3323 ; AVX512VL-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27:
   3324 ; AVX512VL:       # %bb.0:
   3325 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [4,16,5,17,6,18,7,27,12,24,13,25,14,26,15,27]
   3326 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   3327 ; AVX512VL-NEXT:    retq
   3328   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 16, i32 5, i32 17, i32 6, i32 18, i32 7, i32 27, i32 12, i32 24, i32 13, i32 25, i32 14, i32 26, i32 15, i32 27>
   3329   ret <16 x i16> %shuffle
   3330 }
   3331 
   3332 define <16 x i16> @shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
   3333 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
   3334 ; AVX1:       # %bb.0:
   3335 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   3336 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[0,3,2,3]
   3337 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
   3338 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm4[0,3,2,3]
   3339 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
   3340 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,1,4,5,6,7]
   3341 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
   3342 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7]
   3343 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15]
   3344 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
   3345 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   3346 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   3347 ; AVX1-NEXT:    retq
   3348 ;
   3349 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
   3350 ; AVX2-SLOW:       # %bb.0:
   3351 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm2 = ymm1[2,3,0,1]
   3352 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm1 = ymm1[0,0,2,1,4,5,6,7,8,8,10,9,12,13,14,15]
   3353 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} ymm1 = ymm1[0,1,2,3,4,6,6,7,8,9,10,11,12,14,14,15]
   3354 ; AVX2-SLOW-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm2[3],ymm1[4,5,6,7]
   3355 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15]
   3356 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,7,7,8,9,10,11,14,13,15,15]
   3357 ; AVX2-SLOW-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
   3358 ; AVX2-SLOW-NEXT:    retq
   3359 ;
   3360 ; AVX2-FAST-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
   3361 ; AVX2-FAST:       # %bb.0:
   3362 ; AVX2-FAST-NEXT:    vpermq {{.*#+}} ymm2 = ymm1[2,3,0,1]
   3363 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,2,3,8,9,12,13,12,13,14,15,16,17,16,17,20,21,18,19,24,25,28,29,28,29,30,31]
   3364 ; AVX2-FAST-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm2[3],ymm1[4,5,6,7]
   3365 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,2,3,6,7,12,13,10,11,14,15,14,15,16,17,18,19,18,19,22,23,28,29,26,27,30,31,30,31]
   3366 ; AVX2-FAST-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
   3367 ; AVX2-FAST-NEXT:    retq
   3368 ;
   3369 ; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
   3370 ; AVX512VL:       # %bb.0:
   3371 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,16,1,17,6,22,7,31,8,24,9,25,14,30,15,31]
   3372 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   3373 ; AVX512VL-NEXT:    retq
   3374   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 6, i32 22, i32 7, i32 31, i32 8, i32 24, i32 9, i32 25, i32 14, i32 30, i32 15, i32 31>
   3375   ret <16 x i16> %shuffle
   3376 }
   3377 
   3378 define <16 x i16> @shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25(<16 x i16> %a, <16 x i16> %b) {
   3379 ; AVX1-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
   3380 ; AVX1:       # %bb.0:
   3381 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   3382 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[2,0,2,3]
   3383 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
   3384 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm4[0,3,2,3]
   3385 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
   3386 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7]
   3387 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,0,1,2,3,2,3,0,1,12,13,2,3]
   3388 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
   3389 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   3390 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   3391 ; AVX1-NEXT:    retq
   3392 ;
   3393 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
   3394 ; AVX2-SLOW:       # %bb.0:
   3395 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm2 = ymm1[2,3,0,1]
   3396 ; AVX2-SLOW-NEXT:    vmovdqa {{.*#+}} ymm3 = <255,255,0,0,u,u,u,u,255,255,255,255,u,u,u,u,255,255,255,255,u,u,u,u,255,255,255,255,u,u,u,u>
   3397 ; AVX2-SLOW-NEXT:    vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
   3398 ; AVX2-SLOW-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[8,9,8,9,4,5,10,11,0,1,0,1,12,13,2,3,24,25,24,25,20,21,26,27,16,17,16,17,28,29,18,19]
   3399 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15]
   3400 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,7,7,8,9,10,11,14,13,15,15]
   3401 ; AVX2-SLOW-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
   3402 ; AVX2-SLOW-NEXT:    retq
   3403 ;
   3404 ; AVX2-FAST-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
   3405 ; AVX2-FAST:       # %bb.0:
   3406 ; AVX2-FAST-NEXT:    vpermq {{.*#+}} ymm2 = ymm1[2,3,0,1]
   3407 ; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} ymm3 = <255,255,0,0,u,u,u,u,255,255,255,255,u,u,u,u,255,255,255,255,u,u,u,u,255,255,255,255,u,u,u,u>
   3408 ; AVX2-FAST-NEXT:    vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
   3409 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[8,9,8,9,4,5,10,11,0,1,0,1,12,13,2,3,24,25,24,25,20,21,26,27,16,17,16,17,28,29,18,19]
   3410 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,2,3,6,7,12,13,10,11,14,15,14,15,16,17,18,19,18,19,22,23,28,29,26,27,30,31,30,31]
   3411 ; AVX2-FAST-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
   3412 ; AVX2-FAST-NEXT:    retq
   3413 ;
   3414 ; AVX512VL-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
   3415 ; AVX512VL:       # %bb.0:
   3416 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,20,1,21,6,16,7,25,8,28,9,29,14,24,15,25]
   3417 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   3418 ; AVX512VL-NEXT:    retq
   3419   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 20, i32 1, i32 21, i32 6, i32 16, i32 7, i32 25, i32 8, i32 28, i32 9, i32 29, i32 14, i32 24, i32 15, i32 25>
   3420   ret <16 x i16> %shuffle
   3421 }
   3422 
   3423 define <16 x i16> @shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26(<16 x i16> %a, <16 x i16> %b) {
   3424 ; AVX1-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
   3425 ; AVX1:       # %bb.0:
   3426 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   3427 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   3428 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,12,13,10,11,8,9,10,11,12,13,10,11]
   3429 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm3 = xmm0[1,0,3,2,4,5,6,7]
   3430 ; AVX1-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
   3431 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   3432 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   3433 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
   3434 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
   3435 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   3436 ; AVX1-NEXT:    retq
   3437 ;
   3438 ; AVX2-SLOW-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
   3439 ; AVX2-SLOW:       # %bb.0:
   3440 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm2 = ymm1[2,3,0,1]
   3441 ; AVX2-SLOW-NEXT:    vmovdqa {{.*#+}} ymm3 = <255,255,255,255,0,0,255,255,u,u,u,u,u,u,u,u,255,255,255,255,255,255,255,255,u,u,u,u,u,u,u,u>
   3442 ; AVX2-SLOW-NEXT:    vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
   3443 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm1 = ymm1[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
   3444 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
   3445 ; AVX2-SLOW-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
   3446 ; AVX2-SLOW-NEXT:    retq
   3447 ;
   3448 ; AVX2-FAST-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
   3449 ; AVX2-FAST:       # %bb.0:
   3450 ; AVX2-FAST-NEXT:    vpermq {{.*#+}} ymm2 = ymm1[2,3,0,1]
   3451 ; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} ymm3 = <255,255,255,255,0,0,255,255,u,u,u,u,u,u,u,u,255,255,255,255,255,255,255,255,u,u,u,u,u,u,u,u>
   3452 ; AVX2-FAST-NEXT:    vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
   3453 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[2,3,0,1,2,3,0,1,8,9,10,11,6,7,4,5,18,19,16,17,18,19,16,17,24,25,26,27,22,23,20,21]
   3454 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,6,7,4,5,12,13,14,15,18,19,16,17,22,23,20,21,22,23,20,21,28,29,30,31]
   3455 ; AVX2-FAST-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
   3456 ; AVX2-FAST-NEXT:    retq
   3457 ;
   3458 ; AVX512VL-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
   3459 ; AVX512VL:       # %bb.0:
   3460 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,0,17,16,3,2,19,26,9,8,25,24,11,10,27,26]
   3461 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   3462 ; AVX512VL-NEXT:    retq
   3463   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 0, i32 17, i32 16, i32 3, i32 2, i32 19, i32 26, i32 9, i32 8, i32 25, i32 24, i32 11, i32 10, i32 27, i32 26>
   3464   ret <16 x i16> %shuffle
   3465 }
   3466 
   3467 define <16 x i16> @shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11(<16 x i16> %a, <16 x i16> %b) {
   3468 ; AVX1-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
   3469 ; AVX1:       # %bb.0:
   3470 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   3471 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   3472 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
   3473 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   3474 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15]
   3475 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
   3476 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   3477 ; AVX1-NEXT:    retq
   3478 ;
   3479 ; AVX2-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
   3480 ; AVX2:       # %bb.0:
   3481 ; AVX2-NEXT:    vpermq {{.*#+}} ymm2 = ymm0[2,3,0,1]
   3482 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = <255,255,255,255,255,255,0,0,u,u,u,u,u,u,u,u,255,255,255,255,255,255,255,255,u,u,u,u,u,u,u,u>
   3483 ; AVX2-NEXT:    vpblendvb %ymm3, %ymm0, %ymm2, %ymm0
   3484 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11]
   3485 ; AVX2-NEXT:    retq
   3486 ;
   3487 ; AVX512VL-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
   3488 ; AVX512VL:       # %bb.0:
   3489 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,16,1,17,2,18,3,27,8,24,9,25,10,26,11,27]
   3490 ; AVX512VL-NEXT:    vpermi2w %ymm0, %ymm1, %ymm2
   3491 ; AVX512VL-NEXT:    vmovdqa %ymm2, %ymm0
   3492 ; AVX512VL-NEXT:    retq
   3493   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 0, i32 17, i32 1, i32 18, i32 2, i32 19, i32 11, i32 24, i32 8, i32 25, i32 9, i32 26, i32 10, i32 27, i32 11>
   3494   ret <16 x i16> %shuffle
   3495 }
   3496 
   3497 define <16 x i16> @shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15(<16 x i16> %a, <16 x i16> %b) {
   3498 ; AVX1-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
   3499 ; AVX1:       # %bb.0:
   3500 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   3501 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   3502 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
   3503 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
   3504 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15]
   3505 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
   3506 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   3507 ; AVX1-NEXT:    retq
   3508 ;
   3509 ; AVX2-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
   3510 ; AVX2:       # %bb.0:
   3511 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15]
   3512 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm2 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
   3513 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
   3514 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm2[0,1,2],ymm0[3],ymm2[4,5,6,7]
   3515 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
   3516 ; AVX2-NEXT:    retq
   3517 ;
   3518 ; AVX512VL-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
   3519 ; AVX512VL:       # %bb.0:
   3520 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [4,20,5,21,6,22,7,31,12,28,13,29,14,30,15,31]
   3521 ; AVX512VL-NEXT:    vpermi2w %ymm0, %ymm1, %ymm2
   3522 ; AVX512VL-NEXT:    vmovdqa %ymm2, %ymm0
   3523 ; AVX512VL-NEXT:    retq
   3524   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 4, i32 21, i32 5, i32 22, i32 6, i32 23, i32 15, i32 28, i32 12, i32 29, i32 13, i32 30, i32 14, i32 31, i32 15>
   3525   ret <16 x i16> %shuffle
   3526 }
   3527 
   3528 define <16 x i16> @shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31(<16 x i16> %a, <16 x i16> %b) {
   3529 ; AVX1-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31:
   3530 ; AVX1:       # %bb.0:
   3531 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   3532 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm3 = xmm2[0,1,2,3,4,6,5,7]
   3533 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
   3534 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm4 = xmm4[0,2,1,3,4,5,6,7]
   3535 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
   3536 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   3537 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,4,5,14,15,0,1,4,5,4,5,6,7]
   3538 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
   3539 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   3540 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
   3541 ; AVX1-NEXT:    retq
   3542 ;
   3543 ; AVX2-SLOW-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31:
   3544 ; AVX2-SLOW:       # %bb.0:
   3545 ; AVX2-SLOW-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
   3546 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   3547 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,2,1,3,4,5,6,7,8,10,9,11,12,13,14,15]
   3548 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,5,7,8,9,10,11,12,14,13,15]
   3549 ; AVX2-SLOW-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   3550 ; AVX2-SLOW-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   3551 ; AVX2-SLOW-NEXT:    retq
   3552 ;
   3553 ; AVX2-FAST-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31:
   3554 ; AVX2-FAST:       # %bb.0:
   3555 ; AVX2-FAST-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
   3556 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm1 = ymm0[0,1,4,5,2,3,6,7,8,9,12,13,10,11,14,15,16,17,20,21,18,19,22,23,24,25,28,29,26,27,30,31]
   3557 ; AVX2-FAST-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
   3558 ; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   3559 ; AVX2-FAST-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
   3560 ; AVX2-FAST-NEXT:    retq
   3561 ;
   3562 ; AVX512VL-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31:
   3563 ; AVX512VL:       # %bb.0:
   3564 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,2,1,3,20,22,21,31,8,10,9,11,28,30,29,31]
   3565 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   3566 ; AVX512VL-NEXT:    retq
   3567   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 2, i32 1, i32 3, i32 20, i32 22, i32 21, i32 31, i32 8, i32 10, i32 9, i32 11, i32 28, i32 30, i32 29, i32 31>
   3568   ret <16 x i16> %shuffle
   3569 }
   3570 
   3571 define <16 x i16> @shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   3572 ; AVX1-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu:
   3573 ; AVX1:       # %bb.0:
   3574 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   3575 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,1,2,3]
   3576 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[0,0,3,2,4,5,6,7]
   3577 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
   3578 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   3579 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   3580 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
   3581 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
   3582 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
   3583 ; AVX1-NEXT:    retq
   3584 ;
   3585 ; AVX2-SLOW-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu:
   3586 ; AVX2-SLOW:       # %bb.0:
   3587 ; AVX2-SLOW-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15]
   3588 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,1,2,3,6,5,6,7]
   3589 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,0,3,2,4,5,6,7,8,8,11,10,12,13,14,15]
   3590 ; AVX2-SLOW-NEXT:    retq
   3591 ;
   3592 ; AVX2-FAST-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu:
   3593 ; AVX2-FAST:       # %bb.0:
   3594 ; AVX2-FAST-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15]
   3595 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,6,7,4,5,8,9,10,11,12,13,14,15,24,25,24,25,22,23,20,21,24,25,26,27,28,29,30,31]
   3596 ; AVX2-FAST-NEXT:    retq
   3597 ;
   3598 ; AVX512VL-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu:
   3599 ; AVX512VL:       # %bb.0:
   3600 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = <4,4,3,18,u,u,u,u,12,12,11,26,u,u,u,u>
   3601 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   3602 ; AVX512VL-NEXT:    retq
   3603   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 3, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 12, i32 11, i32 26, i32 undef, i32 undef, i32 undef, i32 undef>
   3604   ret <16 x i16> %shuffle
   3605 }
   3606 
   3607 define <16 x i16> @shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   3608 ; AVX1-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu:
   3609 ; AVX1:       # %bb.0:
   3610 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   3611 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   3612 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
   3613 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
   3614 ; AVX1-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
   3615 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
   3616 ; AVX1-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
   3617 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3618 ; AVX1-NEXT:    retq
   3619 ;
   3620 ; AVX2-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu:
   3621 ; AVX2:       # %bb.0:
   3622 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
   3623 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3,16,17,22,23,20,21,26,27,16,17,26,27,16,17,18,19]
   3624 ; AVX2-NEXT:    retq
   3625 ;
   3626 ; AVX512VL-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu:
   3627 ; AVX512VL:       # %bb.0:
   3628 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,3,2,21,u,u,u,u,8,11,10,29,u,u,u,u>
   3629 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   3630 ; AVX512VL-NEXT:    retq
   3631   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 3, i32 2, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 11, i32 10, i32 29, i32 undef, i32 undef, i32 undef, i32 undef>
   3632   ret <16 x i16> %shuffle
   3633 }
   3634 
   3635 define <16 x i16> @shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   3636 ; ALL-LABEL: shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu:
   3637 ; ALL:       # %bb.0:
   3638 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm1[0,2,2,3,4,6,6,7]
   3639 ; ALL-NEXT:    retq
   3640   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 29, i32 undef, i32 undef, i32 undef, i32 undef>
   3641   ret <16 x i16> %shuffle
   3642 }
   3643 
   3644 define <16 x i16> @shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   3645 ; AVX1-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu:
   3646 ; AVX1:       # %bb.0:
   3647 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   3648 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   3649 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
   3650 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[3],xmm2[4,5,6,7]
   3651 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
   3652 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
   3653 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3654 ; AVX1-NEXT:    retq
   3655 ;
   3656 ; AVX2-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu:
   3657 ; AVX2:       # %bb.0:
   3658 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
   3659 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
   3660 ; AVX2-NEXT:    retq
   3661 ;
   3662 ; AVX512VL-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu:
   3663 ; AVX512VL:       # %bb.0:
   3664 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,1,2,21,u,u,u,u,8,9,10,29,u,u,u,u>
   3665 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   3666 ; AVX512VL-NEXT:    retq
   3667   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 29, i32 undef, i32 undef, i32 undef, i32 undef>
   3668   ret <16 x i16> %shuffle
   3669 }
   3670 
   3671 define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11(<16 x i16> %a, <16 x i16> %b) {
   3672 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11:
   3673 ; AVX1:       # %bb.0:
   3674 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   3675 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   3676 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
   3677 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm0[7]
   3678 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
   3679 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3680 ; AVX1-NEXT:    retq
   3681 ;
   3682 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11:
   3683 ; AVX2:       # %bb.0:
   3684 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,2]
   3685 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
   3686 ; AVX2-NEXT:    retq
   3687 ;
   3688 ; AVX512VL-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11:
   3689 ; AVX512VL:       # %bb.0:
   3690 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = <u,u,u,u,4,5,6,27,u,u,u,u,12,13,14,27>
   3691 ; AVX512VL-NEXT:    vpermi2w %ymm0, %ymm1, %ymm2
   3692 ; AVX512VL-NEXT:    vmovdqa %ymm2, %ymm0
   3693 ; AVX512VL-NEXT:    retq
   3694   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 20, i32 21, i32 22, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 11>
   3695   ret <16 x i16> %shuffle
   3696 }
   3697 
   3698 define <16 x i16> @shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   3699 ; AVX1-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu:
   3700 ; AVX1:       # %bb.0:
   3701 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   3702 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   3703 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
   3704 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[3],xmm3[4,5,6,7]
   3705 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
   3706 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
   3707 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3708 ; AVX1-NEXT:    retq
   3709 ;
   3710 ; AVX2-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu:
   3711 ; AVX2:       # %bb.0:
   3712 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7]
   3713 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6,7,8,9,10],ymm0[11],ymm1[12,13,14,15]
   3714 ; AVX2-NEXT:    retq
   3715 ;
   3716 ; AVX512VL-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu:
   3717 ; AVX512VL:       # %bb.0:
   3718 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = <4,5,6,19,u,u,u,u,12,13,14,27,u,u,u,u>
   3719 ; AVX512VL-NEXT:    vpermi2w %ymm0, %ymm1, %ymm2
   3720 ; AVX512VL-NEXT:    vmovdqa %ymm2, %ymm0
   3721 ; AVX512VL-NEXT:    retq
   3722   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 21, i32 22, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
   3723   ret <16 x i16> %shuffle
   3724 }
   3725 
   3726 define <16 x i16> @shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11(<16 x i16> %a, <16 x i16> %b) {
   3727 ; AVX1-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11:
   3728 ; AVX1:       # %bb.0:
   3729 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   3730 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
   3731 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm1[0,2,2,3]
   3732 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3,4,5,6],xmm0[7]
   3733 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
   3734 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
   3735 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
   3736 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   3737 ; AVX1-NEXT:    retq
   3738 ;
   3739 ; AVX2-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11:
   3740 ; AVX2:       # %bb.0:
   3741 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
   3742 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   3743 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <255,255,255,255,255,255,0,0,255,255,255,255,255,255,u,u,255,255,255,255,255,255,255,255,255,255,255,255,255,255,u,u>
   3744 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   3745 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7,16,17,18,19,20,21,26,27,24,25,26,27,28,29,22,23]
   3746 ; AVX2-NEXT:    retq
   3747 ;
   3748 ; AVX512VL-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11:
   3749 ; AVX512VL:       # %bb.0:
   3750 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,1,2,21,20,21,22,11,8,9,10,29,28,29,30,11]
   3751 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   3752 ; AVX512VL-NEXT:    retq
   3753   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 21, i32 20, i32 21, i32 22, i32 11, i32 8, i32 9, i32 10, i32 29, i32 28, i32 29, i32 30, i32 11>
   3754   ret <16 x i16> %shuffle
   3755 }
   3756 
   3757 define <16 x i16> @shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15(<16 x i16> %a, <16 x i16> %b) {
   3758 ; AVX1-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15:
   3759 ; AVX1:       # %bb.0:
   3760 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   3761 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   3762 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2,3],xmm2[4,5,6],xmm3[7]
   3763 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
   3764 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
   3765 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3766 ; AVX1-NEXT:    retq
   3767 ;
   3768 ; AVX2-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15:
   3769 ; AVX2:       # %bb.0:
   3770 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
   3771 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4,5,6],ymm0[7,8],ymm1[9],ymm0[10,11],ymm1[12,13,14],ymm0[15]
   3772 ; AVX2-NEXT:    retq
   3773 ;
   3774 ; AVX512VL-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15:
   3775 ; AVX512VL:       # %bb.0:
   3776 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,17,2,3,20,21,22,15,8,25,10,11,28,29,30,15]
   3777 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   3778 ; AVX512VL-NEXT:    retq
   3779   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 20, i32 21, i32 22, i32 15, i32 8, i32 25, i32 10, i32 11, i32 28, i32 29, i32 30, i32 15>
   3780   ret <16 x i16> %shuffle
   3781 }
   3782 
   3783 define <16 x i16> @shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25(<16 x i16> %a, <16 x i16> %b) {
   3784 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
   3785 ; AVX1:       # %bb.0:
   3786 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
   3787 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
   3788 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   3789 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[0,1,2,1,4,5,6,7]
   3790 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,7,7]
   3791 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm1[7]
   3792 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
   3793 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
   3794 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   3795 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3796 ; AVX1-NEXT:    retq
   3797 ;
   3798 ; AVX2-SLOW-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
   3799 ; AVX2-SLOW:       # %bb.0:
   3800 ; AVX2-SLOW-NEXT:    vextracti128 $1, %ymm1, %xmm1
   3801 ; AVX2-SLOW-NEXT:    vpbroadcastd %xmm1, %ymm1
   3802 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[0,1,2,1,4,5,6,7,8,9,10,9,12,13,14,15]
   3803 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,7,7,8,9,10,11,12,13,15,15]
   3804 ; AVX2-SLOW-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
   3805 ; AVX2-SLOW-NEXT:    retq
   3806 ;
   3807 ; AVX2-FAST-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
   3808 ; AVX2-FAST:       # %bb.0:
   3809 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,2,3,8,9,10,11,14,15,14,15,16,17,18,19,20,21,18,19,24,25,26,27,30,31,30,31]
   3810 ; AVX2-FAST-NEXT:    vextracti128 $1, %ymm1, %xmm1
   3811 ; AVX2-FAST-NEXT:    vpbroadcastd %xmm1, %ymm1
   3812 ; AVX2-FAST-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
   3813 ; AVX2-FAST-NEXT:    retq
   3814 ;
   3815 ; AVX512VL-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
   3816 ; AVX512VL:       # %bb.0:
   3817 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = <u,u,u,1,u,5,7,25,u,u,u,9,u,13,15,25>
   3818 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   3819 ; AVX512VL-NEXT:    retq
   3820   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 25, i32 undef, i32 undef, i32 undef, i32 9, i32 undef, i32 13, i32 15, i32 25>
   3821   ret <16 x i16> %shuffle
   3822 }
   3823 
   3824 define <16 x i16> @shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu(<16 x i16> %a, <16 x i16> %b) {
   3825 ; AVX1-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu:
   3826 ; AVX1:       # %bb.0:
   3827 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   3828 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
   3829 ; AVX1-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
   3830 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
   3831 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm4[2,2,3,3]
   3832 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7]
   3833 ; AVX1-NEXT:    vpshufb %xmm3, %xmm1, %xmm1
   3834 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
   3835 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
   3836 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3837 ; AVX1-NEXT:    retq
   3838 ;
   3839 ; AVX2-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu:
   3840 ; AVX2:       # %bb.0:
   3841 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5,16,17,20,21,20,21,22,23,16,17,20,21,24,25,20,21]
   3842 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
   3843 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
   3844 ; AVX2-NEXT:    retq
   3845 ;
   3846 ; AVX512VL-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu:
   3847 ; AVX512VL:       # %bb.0:
   3848 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = <u,u,20,u,0,2,4,u,u,u,28,u,8,10,12,u>
   3849 ; AVX512VL-NEXT:    vpermi2w %ymm0, %ymm1, %ymm2
   3850 ; AVX512VL-NEXT:    vmovdqa %ymm2, %ymm0
   3851 ; AVX512VL-NEXT:    retq
   3852   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 16, i32 18, i32 20, i32 undef, i32 undef, i32 undef, i32 12, i32 undef, i32 24, i32 26, i32 28, i32 undef>
   3853   ret <16 x i16> %shuffle
   3854 }
   3855 
   3856 define <16 x i16> @shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12(<16 x i16> %a, <16 x i16> %b) {
   3857 ; AVX1-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12:
   3858 ; AVX1:       # %bb.0:
   3859 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   3860 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   3861 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
   3862 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
   3863 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
   3864 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3865 ; AVX1-NEXT:    retq
   3866 ;
   3867 ; AVX2-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12:
   3868 ; AVX2:       # %bb.0:
   3869 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5,6,7],ymm0[8,9,10,11,12],ymm1[13,14,15]
   3870 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   3871 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   3872 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   3873 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
   3874 ; AVX2-NEXT:    retq
   3875 ;
   3876 ; AVX512VL-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12:
   3877 ; AVX512VL:       # %bb.0:
   3878 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [21,22,23,0,1,2,3,12,29,30,31,8,9,10,11,12]
   3879 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   3880 ; AVX512VL-NEXT:    retq
   3881   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 12, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12>
   3882   ret <16 x i16> %shuffle
   3883 }
   3884 
   3885 define <16 x i16> @shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) {
   3886 ; AVX1-LABEL: shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu:
   3887 ; AVX1:       # %bb.0:
   3888 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   3889 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   3890 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
   3891 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
   3892 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3893 ; AVX1-NEXT:    retq
   3894 ;
   3895 ; AVX2OR512VL-LABEL: shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu:
   3896 ; AVX2OR512VL:       # %bb.0:
   3897 ; AVX2OR512VL-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[10,11,12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9],ymm1[26,27,28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25]
   3898 ; AVX2OR512VL-NEXT:    retq
   3899   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 22, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 30, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
   3900   ret <16 x i16> %shuffle
   3901 }
   3902 
   3903 define <16 x i16> @shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12(<16 x i16> %a, <16 x i16> %b) {
   3904 ; AVX1-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12:
   3905 ; AVX1:       # %bb.0:
   3906 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   3907 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
   3908 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   3909 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   3910 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   3911 ; AVX1-NEXT:    retq
   3912 ;
   3913 ; AVX2-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12:
   3914 ; AVX2:       # %bb.0:
   3915 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   3916 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   3917 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   3918 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
   3919 ; AVX2-NEXT:    retq
   3920 ;
   3921 ; AVX512VL-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12:
   3922 ; AVX512VL:       # %bb.0:
   3923 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [5,6,7,0,1,2,3,12,13,14,15,8,9,10,11,12]
   3924 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   3925 ; AVX512VL-NEXT:    retq
   3926   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12>
   3927   ret <16 x i16> %shuffle
   3928 }
   3929 
   3930 define <16 x i16> @shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) {
   3931 ; AVX1-LABEL: shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu:
   3932 ; AVX1:       # %bb.0:
   3933 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   3934 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   3935 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
   3936 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   3937 ; AVX1-NEXT:    retq
   3938 ;
   3939 ; AVX2OR512VL-LABEL: shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu:
   3940 ; AVX2OR512VL:       # %bb.0:
   3941 ; AVX2OR512VL-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
   3942 ; AVX2OR512VL-NEXT:    retq
   3943   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 14, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
   3944   ret <16 x i16> %shuffle
   3945 }
   3946 
   3947 define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) {
   3948 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu:
   3949 ; AVX1:       # %bb.0:
   3950 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
   3951 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   3952 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
   3953 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   3954 ; AVX1-NEXT:    retq
   3955 ;
   3956 ; AVX2OR512VL-LABEL: shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu:
   3957 ; AVX2OR512VL:       # %bb.0:
   3958 ; AVX2OR512VL-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9],zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25]
   3959 ; AVX2OR512VL-NEXT:    retq
   3960   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
   3961   ret <16 x i16> %shuffle
   3962 }
   3963 
   3964 define <16 x i16> @shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10(<16 x i16> %a, <16 x i16> %b) {
   3965 ; AVX1-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10:
   3966 ; AVX1:       # %bb.0:
   3967 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   3968 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   3969 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
   3970 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
   3971 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,4,5,6,7,8,9,0,1,4,5,10,11]
   3972 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   3973 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4],xmm0[5,6,7]
   3974 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   3975 ; AVX1-NEXT:    retq
   3976 ;
   3977 ; AVX2-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10:
   3978 ; AVX2:       # %bb.0:
   3979 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7],ymm0[8,9,10],ymm1[11,12,13,14,15]
   3980 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   3981 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   3982 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   3983 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
   3984 ; AVX2-NEXT:    retq
   3985 ;
   3986 ; AVX512VL-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10:
   3987 ; AVX512VL:       # %bb.0:
   3988 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [3,4,5,6,7,16,17,26,11,12,13,14,15,24,25,26]
   3989 ; AVX512VL-NEXT:    vpermi2w %ymm0, %ymm1, %ymm2
   3990 ; AVX512VL-NEXT:    vmovdqa %ymm2, %ymm0
   3991 ; AVX512VL-NEXT:    retq
   3992   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 10, i32 27, i32 28, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10>
   3993   ret <16 x i16> %shuffle
   3994 }
   3995 
   3996 define <16 x i16> @shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu(<16 x i16> %a, <16 x i16> %b) {
   3997 ; AVX1-LABEL: shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu:
   3998 ; AVX1:       # %bb.0:
   3999 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   4000 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   4001 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
   4002 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
   4003 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   4004 ; AVX1-NEXT:    retq
   4005 ;
   4006 ; AVX2OR512VL-LABEL: shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu:
   4007 ; AVX2OR512VL:       # %bb.0:
   4008 ; AVX2OR512VL-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3,4,5],ymm1[22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19,20,21]
   4009 ; AVX2OR512VL-NEXT:    retq
   4010   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 20, i32 21, i32 22, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 undef, i32 undef, i32 9, i32 undef>
   4011   ret <16 x i16> %shuffle
   4012 }
   4013 
   4014 define <16 x i16> @shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10(<16 x i16> %a, <16 x i16> %b) {
   4015 ; AVX1-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10:
   4016 ; AVX1:       # %bb.0:
   4017 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   4018 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
   4019 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   4020 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   4021 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   4022 ; AVX1-NEXT:    retq
   4023 ;
   4024 ; AVX2-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10:
   4025 ; AVX2:       # %bb.0:
   4026 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   4027 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   4028 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   4029 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
   4030 ; AVX2-NEXT:    retq
   4031 ;
   4032 ; AVX512VL-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10:
   4033 ; AVX512VL:       # %bb.0:
   4034 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,4,5,6,7,0,1,10,11,12,13,14,15,8,9,10]
   4035 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
   4036 ; AVX512VL-NEXT:    retq
   4037   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10>
   4038   ret <16 x i16> %shuffle
   4039 }
   4040 
   4041 define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu(<16 x i16> %a, <16 x i16> %b) {
   4042 ; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu:
   4043 ; AVX1:       # %bb.0:
   4044 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   4045 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   4046 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
   4047 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   4048 ; AVX1-NEXT:    retq
   4049 ;
   4050 ; AVX2OR512VL-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu:
   4051 ; AVX2OR512VL:       # %bb.0:
   4052 ; AVX2OR512VL-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
   4053 ; AVX2OR512VL-NEXT:    retq
   4054   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 9, i32 undef>
   4055   ret <16 x i16> %shuffle
   4056 }
   4057 
   4058 define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
   4059 ; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu:
   4060 ; AVX1:       # %bb.0:
   4061 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   4062 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   4063 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   4064 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   4065 ; AVX1-NEXT:    retq
   4066 ;
   4067 ; AVX2OR512VL-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu:
   4068 ; AVX2OR512VL:       # %bb.0:
   4069 ; AVX2OR512VL-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,ymm0[22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero
   4070 ; AVX2OR512VL-NEXT:    retq
   4071   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 undef, i32 undef>
   4072   ret <16 x i16> %shuffle
   4073 }
   4074 
   4075 define <16 x i16> @shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26(<16 x i16> %a, <16 x i16> %b) {
   4076 ; AVX1-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26:
   4077 ; AVX1:       # %bb.0:
   4078 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   4079 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   4080 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
   4081 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
   4082 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,8,9,0,1,4,5,10,11]
   4083 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   4084 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5,6,7]
   4085 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   4086 ; AVX1-NEXT:    retq
   4087 ;
   4088 ; AVX2-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26:
   4089 ; AVX2:       # %bb.0:
   4090 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15]
   4091 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   4092 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   4093 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   4094 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
   4095 ; AVX2-NEXT:    retq
   4096 ;
   4097 ; AVX512VL-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26:
   4098 ; AVX512VL:       # %bb.0:
   4099 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [3,4,5,6,7,16,17,26,11,12,13,14,15,24,25,26]
   4100 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   4101 ; AVX512VL-NEXT:    retq
   4102   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 26, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26>
   4103   ret <16 x i16> %shuffle
   4104 }
   4105 
   4106 define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu(<16 x i16> %a, <16 x i16> %b) {
   4107 ; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu:
   4108 ; AVX1:       # %bb.0:
   4109 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   4110 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   4111 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
   4112 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
   4113 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   4114 ; AVX1-NEXT:    retq
   4115 ;
   4116 ; AVX2OR512VL-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu:
   4117 ; AVX2OR512VL:       # %bb.0:
   4118 ; AVX2OR512VL-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3,4,5],ymm0[22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19,20,21]
   4119 ; AVX2OR512VL-NEXT:    retq
   4120   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 17, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 25, i32 undef>
   4121   ret <16 x i16> %shuffle
   4122 }
   4123 
   4124 define <16 x i16> @shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28(<16 x i16> %a, <16 x i16> %b) {
   4125 ; AVX1-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28:
   4126 ; AVX1:       # %bb.0:
   4127 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   4128 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   4129 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
   4130 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
   4131 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
   4132 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   4133 ; AVX1-NEXT:    retq
   4134 ;
   4135 ; AVX2-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28:
   4136 ; AVX2:       # %bb.0:
   4137 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4],ymm0[5,6,7],ymm1[8,9,10,11,12],ymm0[13,14,15]
   4138 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
   4139 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   4140 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
   4141 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
   4142 ; AVX2-NEXT:    retq
   4143 ;
   4144 ; AVX512VL-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28:
   4145 ; AVX512VL:       # %bb.0:
   4146 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [21,22,23,0,1,2,3,12,29,30,31,8,9,10,11,12]
   4147 ; AVX512VL-NEXT:    vpermi2w %ymm0, %ymm1, %ymm2
   4148 ; AVX512VL-NEXT:    vmovdqa %ymm2, %ymm0
   4149 ; AVX512VL-NEXT:    retq
   4150   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 28, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28>
   4151   ret <16 x i16> %shuffle
   4152 }
   4153 
   4154 define <16 x i16> @shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu(<16 x i16> %a, <16 x i16> %b) {
   4155 ; AVX1-LABEL: shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu:
   4156 ; AVX1:       # %bb.0:
   4157 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   4158 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   4159 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
   4160 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
   4161 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   4162 ; AVX1-NEXT:    retq
   4163 ;
   4164 ; AVX2OR512VL-LABEL: shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu:
   4165 ; AVX2OR512VL:       # %bb.0:
   4166 ; AVX2OR512VL-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9],ymm0[26,27,28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25]
   4167 ; AVX2OR512VL-NEXT:    retq
   4168   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 14, i32 undef, i32 undef, i32 25, i32 26, i32 27, i32 undef>
   4169   ret <16 x i16> %shuffle
   4170 }
   4171 
   4172 define <16 x i16> @shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu(<16 x i16> %a, <16 x i16> %b) {
   4173 ; AVX1-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu:
   4174 ; AVX1:       # %bb.0:
   4175 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   4176 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   4177 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   4178 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,5,4,4]
   4179 ; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
   4180 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   4181 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
   4182 ; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
   4183 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   4184 ; AVX1-NEXT:    retq
   4185 ;
   4186 ; AVX2-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu:
   4187 ; AVX2:       # %bb.0:
   4188 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6],ymm1[7],ymm0[8,9,10,11],ymm1[12],ymm0[13,14],ymm1[15]
   4189 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,6,7,6,7,8,9,8,9,10,11,14,15,30,31,30,31,22,23,22,23,24,25,24,25,26,27,30,31]
   4190 ; AVX2-NEXT:    retq
   4191 ;
   4192 ; AVX512VL-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu:
   4193 ; AVX512VL:       # %bb.0:
   4194 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = <7,u,19,u,4,4,21,u,15,u,27,u,12,12,29,u>
   4195 ; AVX512VL-NEXT:    vpermi2w %ymm0, %ymm1, %ymm2
   4196 ; AVX512VL-NEXT:    vmovdqa %ymm2, %ymm0
   4197 ; AVX512VL-NEXT:    retq
   4198   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 undef, i32 3, i32 undef, i32 20, i32 20, i32 5, i32 undef, i32 31, i32 undef, i32 11, i32 undef, i32 28, i32 28, i32 13, i32 undef>
   4199   ret <16 x i16> %shuffle
   4200 }
   4201 
   4202 define <16 x i16> @shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19(<16 x i16> %a, <16 x i16> %b) {
   4203 ; AVX1-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19:
   4204 ; AVX1:       # %bb.0:
   4205 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   4206 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   4207 ; AVX1-NEXT:    retq
   4208 ;
   4209 ; AVX2OR512VL-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19:
   4210 ; AVX2OR512VL:       # %bb.0:
   4211 ; AVX2OR512VL-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   4212 ; AVX2OR512VL-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
   4213 ; AVX2OR512VL-NEXT:    retq
   4214   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19>
   4215   ret <16 x i16> %shuffle
   4216 }
   4217 
   4218 define <16 x i16> @shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3(<16 x i16> %a, <16 x i16> %b) {
   4219 ; AVX1-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
   4220 ; AVX1:       # %bb.0:
   4221 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,3,2,3,4,5,6,7]
   4222 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
   4223 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   4224 ; AVX1-NEXT:    retq
   4225 ;
   4226 ; AVX2-SLOW-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
   4227 ; AVX2-SLOW:       # %bb.0:
   4228 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,3,2,3,4,5,6,7]
   4229 ; AVX2-SLOW-NEXT:    vpbroadcastd %xmm0, %xmm0
   4230 ; AVX2-SLOW-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
   4231 ; AVX2-SLOW-NEXT:    retq
   4232 ;
   4233 ; AVX2-FAST-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
   4234 ; AVX2-FAST:       # %bb.0:
   4235 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
   4236 ; AVX2-FAST-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
   4237 ; AVX2-FAST-NEXT:    retq
   4238 ;
   4239 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
   4240 ; AVX512VL-SLOW:       # %bb.0:
   4241 ; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,3,2,3,4,5,6,7]
   4242 ; AVX512VL-SLOW-NEXT:    vpbroadcastd %xmm0, %xmm0
   4243 ; AVX512VL-SLOW-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
   4244 ; AVX512VL-SLOW-NEXT:    retq
   4245 ;
   4246 ; AVX512VL-FAST-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
   4247 ; AVX512VL-FAST:       # %bb.0:
   4248 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
   4249 ; AVX512VL-FAST-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
   4250 ; AVX512VL-FAST-NEXT:    retq
   4251   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
   4252   ret <16 x i16> %shuffle
   4253 }
   4254 
   4255 define <16 x i16> @shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8(<16 x i16> %a, <16 x i16> %b) {
   4256 ; AVX1-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8:
   4257 ; AVX1:       # %bb.0:
   4258 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   4259 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
   4260 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
   4261 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   4262 ; AVX1-NEXT:    retq
   4263 ;
   4264 ; AVX2OR512VL-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8:
   4265 ; AVX2OR512VL:       # %bb.0:
   4266 ; AVX2OR512VL-NEXT:    vextracti128 $1, %ymm0, %xmm0
   4267 ; AVX2OR512VL-NEXT:    vpbroadcastw %xmm0, %ymm0
   4268 ; AVX2OR512VL-NEXT:    retq
   4269   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
   4270   ret <16 x i16> %shuffle
   4271 }
   4272 
   4273 define <16 x i16> @shuffle_v16i16_4_20_5_21_6_22_7_23_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) {
   4274 ; ALL-LABEL: shuffle_v16i16_4_20_5_21_6_22_7_23_u_u_u_u_u_u_u_u:
   4275 ; ALL:       # %bb.0:
   4276 ; ALL-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   4277 ; ALL-NEXT:    retq
   4278   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   4279   ret <16 x i16> %shuffle
   4280 }
   4281 
   4282 define <16 x i16> @shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) {
   4283 ; AVX1-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u:
   4284 ; AVX1:       # %bb.0:
   4285 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,3,2,3,4,5,6,7]
   4286 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
   4287 ; AVX1-NEXT:    retq
   4288 ;
   4289 ; AVX2-SLOW-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u:
   4290 ; AVX2-SLOW:       # %bb.0:
   4291 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,3,2,3,4,5,6,7]
   4292 ; AVX2-SLOW-NEXT:    vpbroadcastd %xmm0, %xmm0
   4293 ; AVX2-SLOW-NEXT:    retq
   4294 ;
   4295 ; AVX2-FAST-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u:
   4296 ; AVX2-FAST:       # %bb.0:
   4297 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
   4298 ; AVX2-FAST-NEXT:    retq
   4299 ;
   4300 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u:
   4301 ; AVX512VL-SLOW:       # %bb.0:
   4302 ; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,3,2,3,4,5,6,7]
   4303 ; AVX512VL-SLOW-NEXT:    vpbroadcastd %xmm0, %xmm0
   4304 ; AVX512VL-SLOW-NEXT:    retq
   4305 ;
   4306 ; AVX512VL-FAST-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u:
   4307 ; AVX512VL-FAST:       # %bb.0:
   4308 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
   4309 ; AVX512VL-FAST-NEXT:    retq
   4310   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   4311   ret <16 x i16> %shuffle
   4312 }
   4313 
   4314 define <16 x i16> @shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) {
   4315 ; AVX1-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
   4316 ; AVX1:       # %bb.0:
   4317 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   4318 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
   4319 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
   4320 ; AVX1-NEXT:    retq
   4321 ;
   4322 ; AVX2-SLOW-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
   4323 ; AVX2-SLOW:       # %bb.0:
   4324 ; AVX2-SLOW-NEXT:    vextracti128 $1, %ymm0, %xmm0
   4325 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
   4326 ; AVX2-SLOW-NEXT:    vpbroadcastd %xmm0, %xmm0
   4327 ; AVX2-SLOW-NEXT:    retq
   4328 ;
   4329 ; AVX2-FAST-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
   4330 ; AVX2-FAST:       # %bb.0:
   4331 ; AVX2-FAST-NEXT:    vextracti128 $1, %ymm0, %xmm0
   4332 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
   4333 ; AVX2-FAST-NEXT:    retq
   4334 ;
   4335 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
   4336 ; AVX512VL-SLOW:       # %bb.0:
   4337 ; AVX512VL-SLOW-NEXT:    vextracti128 $1, %ymm0, %xmm0
   4338 ; AVX512VL-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
   4339 ; AVX512VL-SLOW-NEXT:    vpbroadcastd %xmm0, %xmm0
   4340 ; AVX512VL-SLOW-NEXT:    retq
   4341 ;
   4342 ; AVX512VL-FAST-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
   4343 ; AVX512VL-FAST:       # %bb.0:
   4344 ; AVX512VL-FAST-NEXT:    vextracti128 $1, %ymm0, %xmm0
   4345 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
   4346 ; AVX512VL-FAST-NEXT:    retq
   4347   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   4348   ret <16 x i16> %shuffle
   4349 }
   4350 
   4351 define <16 x i16> @shuffle_v16i16_02_18_03_19_00_16_01_17_10_26_11_27_08_24_09_25(<16 x i16> %a0, <16 x i16> %a1) {
   4352 ; AVX1-LABEL: shuffle_v16i16_02_18_03_19_00_16_01_17_10_26_11_27_08_24_09_25:
   4353 ; AVX1:       # %bb.0:
   4354 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   4355 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
   4356 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
   4357 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   4358 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   4359 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
   4360 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
   4361 ; AVX1-NEXT:    retq
   4362 ;
   4363 ; AVX2-LABEL: shuffle_v16i16_02_18_03_19_00_16_01_17_10_26_11_27_08_24_09_25:
   4364 ; AVX2:       # %bb.0:
   4365 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[0,1,4,5,4,5,6,7,0,1,0,1,12,13,2,3,16,17,20,21,20,21,22,23,16,17,16,17,28,29,18,19]
   4366 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[4,5,2,3,6,7,6,7,0,1,2,3,2,3,14,15,20,21,18,19,22,23,22,23,16,17,18,19,18,19,30,31]
   4367 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
   4368 ; AVX2-NEXT:    retq
   4369 ;
   4370 ; AVX512VL-LABEL: shuffle_v16i16_02_18_03_19_00_16_01_17_10_26_11_27_08_24_09_25:
   4371 ; AVX512VL:       # %bb.0:
   4372 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [2,18,3,19,0,16,1,17,10,26,11,27,8,24,9,25]
   4373 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   4374 ; AVX512VL-NEXT:    retq
   4375   %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 0, i32 16, i32 1, i32 17, i32 10, i32 26, i32 11, i32 27, i32 8, i32 24, i32 9, i32 25>
   4376   ret <16 x i16> %1
   4377 }
   4378 
   4379 define <16 x i16> @shuffle_v16i16_02_18_03_19_10_26_11_27_00_16_01_17_08_24_09_25(<16 x i16> %a0, <16 x i16> %a1) {
   4380 ; AVX1-LABEL: shuffle_v16i16_02_18_03_19_10_26_11_27_00_16_01_17_08_24_09_25:
   4381 ; AVX1:       # %bb.0:
   4382 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   4383 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   4384 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
   4385 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[0,0,2,2,4,5,6,7]
   4386 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   4387 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
   4388 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[0,1,0,1]
   4389 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,7,7]
   4390 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
   4391 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
   4392 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,2,4,5,6,7]
   4393 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
   4394 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7]
   4395 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
   4396 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   4397 ; AVX1-NEXT:    retq
   4398 ;
   4399 ; AVX2-LABEL: shuffle_v16i16_02_18_03_19_10_26_11_27_00_16_01_17_08_24_09_25:
   4400 ; AVX2:       # %bb.0:
   4401 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[0,1,4,5,4,5,6,7,0,1,0,1,12,13,2,3,16,17,20,21,20,21,22,23,16,17,16,17,28,29,18,19]
   4402 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[4,5,2,3,6,7,6,7,0,1,2,3,2,3,14,15,20,21,18,19,22,23,22,23,16,17,18,19,18,19,30,31]
   4403 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
   4404 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
   4405 ; AVX2-NEXT:    retq
   4406 ;
   4407 ; AVX512VL-LABEL: shuffle_v16i16_02_18_03_19_10_26_11_27_00_16_01_17_08_24_09_25:
   4408 ; AVX512VL:       # %bb.0:
   4409 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [2,18,3,19,10,26,11,27,0,16,1,17,8,24,9,25]
   4410 ; AVX512VL-NEXT:    vpermt2w %ymm1, %ymm2, %ymm0
   4411 ; AVX512VL-NEXT:    retq
   4412   %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 0, i32 16, i32 1, i32 17, i32 10, i32 26, i32 11, i32 27, i32 8, i32 24, i32 9, i32 25>
   4413   %2 = bitcast <16 x i16> %1 to <4 x i64>
   4414   %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
   4415   %4 = bitcast <4 x i64> %3 to <16 x i16>
   4416   ret <16 x i16> %4
   4417 }
   4418 
   4419 define <16 x i16> @shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13(<16 x i16> %a) {
   4420 ; AVX1-LABEL: shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13:
   4421 ; AVX1:       # %bb.0:
   4422 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,6,7,5]
   4423 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
   4424 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   4425 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
   4426 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
   4427 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   4428 ; AVX1-NEXT:    retq
   4429 ;
   4430 ; AVX2-SLOW-LABEL: shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13:
   4431 ; AVX2-SLOW:       # %bb.0:
   4432 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,7,5,8,9,10,11,12,14,15,13]
   4433 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7]
   4434 ; AVX2-SLOW-NEXT:    retq
   4435 ;
   4436 ; AVX2-FAST-LABEL: shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13:
   4437 ; AVX2-FAST:       # %bb.0:
   4438 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,12,13,14,15,10,11,8,9,12,13,14,15,10,11,24,25,28,29,30,31,26,27,24,25,28,29,30,31,26,27]
   4439 ; AVX2-FAST-NEXT:    retq
   4440 ;
   4441 ; AVX512VL-SLOW-LABEL: shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13:
   4442 ; AVX512VL-SLOW:       # %bb.0:
   4443 ; AVX512VL-SLOW-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,7,5,8,9,10,11,12,14,15,13]
   4444 ; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7]
   4445 ; AVX512VL-SLOW-NEXT:    retq
   4446 ;
   4447 ; AVX512VL-FAST-LABEL: shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13:
   4448 ; AVX512VL-FAST:       # %bb.0:
   4449 ; AVX512VL-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,12,13,14,15,10,11,8,9,12,13,14,15,10,11,24,25,28,29,30,31,26,27,24,25,28,29,30,31,26,27]
   4450 ; AVX512VL-FAST-NEXT:    retq
   4451   %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 4, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7, i32 5, i32 12, i32 14, i32 15, i32 undef, i32 undef, i32 14, i32 15, i32 13>
   4452   ret <16 x i16> %shuffle
   4453 }
   4454 
   4455 define <16 x i16> @insert_v16i16_0elt_into_zero_vector(i16* %ptr) {
   4456 ; ALL-LABEL: insert_v16i16_0elt_into_zero_vector:
   4457 ; ALL:       # %bb.0:
   4458 ; ALL-NEXT:    movzwl (%rdi), %eax
   4459 ; ALL-NEXT:    vmovd %eax, %xmm0
   4460 ; ALL-NEXT:    retq
   4461   %val = load i16, i16* %ptr
   4462   %i0 = insertelement <16 x i16> zeroinitializer, i16 %val, i32 0
   4463   ret <16 x i16> %i0
   4464 }
   4465 
   4466 define <16 x i16> @concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31(<16 x i16> %a, <16 x i16> %b) {
   4467 ; ALL-LABEL: concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31:
   4468 ; ALL:       # %bb.0:
   4469 ; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
   4470 ; ALL-NEXT:    retq
   4471   %alo = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   4472   %bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   4473   %shuf = shufflevector <8 x i16> %alo, <8 x i16> %bhi, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   4474   ret <16 x i16> %shuf
   4475 }
   4476 
   4477 define <16 x i16> @concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc(<16 x i16> %a, <16 x i16> %b) {
   4478 ; AVX1-LABEL: concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc:
   4479 ; AVX1:       # %bb.0:
   4480 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
   4481 ; AVX1-NEXT:    retq
   4482 ;
   4483 ; AVX2-LABEL: concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc:
   4484 ; AVX2:       # %bb.0:
   4485 ; AVX2-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
   4486 ; AVX2-NEXT:    retq
   4487 ;
   4488 ; AVX512VL-LABEL: concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc:
   4489 ; AVX512VL:       # %bb.0:
   4490 ; AVX512VL-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
   4491 ; AVX512VL-NEXT:    retq
   4492   %ahi = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   4493   %bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   4494   %bc0hi = bitcast <8 x i16> %ahi to <16 x i8>
   4495   %bc1hi = bitcast <8 x i16> %bhi to <16 x i8>
   4496   %shuffle8 = shufflevector <16 x i8> %bc0hi, <16 x i8> %bc1hi, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
   4497   %shuffle16 = bitcast <32 x i8> %shuffle8 to <16 x i16>
   4498   ret <16 x i16> %shuffle16
   4499 }
   4500 
   4501 define <16 x i16> @PR24935(<16 x i16> %a, <16 x i16> %b) {
   4502 ; AVX1-LABEL: PR24935:
   4503 ; AVX1:       # %bb.0:
   4504 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[0,0,1,1]
   4505 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   4506 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm4 = xmm3[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
   4507 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm4[0,1],xmm2[2],xmm4[3,4,5,6,7]
   4508 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
   4509 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm5 = xmm4[0,1,2,3,5,5,6,7]
   4510 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm6 = xmm0[2,3,2,3,4,5,6,7,8,9,8,9,0,1,2,3]
   4511 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm5 = xmm6[0],xmm5[1],xmm6[2,3],xmm5[4],xmm6[5,6,7]
   4512 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm5[0,1],xmm2[2,3],xmm5[4,5,6],xmm2[7]
   4513 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
   4514 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
   4515 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3,4,5],xmm1[6,7]
   4516 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[6,7,4,5,4,5,10,11,4,5,14,15,12,13,0,1]
   4517 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3,4,5],xmm0[6],xmm1[7]
   4518 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   4519 ; AVX1-NEXT:    retq
   4520 ;
   4521 ; AVX2-SLOW-LABEL: PR24935:
   4522 ; AVX2-SLOW:       # %bb.0:
   4523 ; AVX2-SLOW-NEXT:    vpshufb {{.*#+}} ymm2 = ymm1[8,9,10,11,4,5,8,9,0,1,14,15,12,13,0,1,24,25,26,27,20,21,24,25,16,17,30,31,28,29,16,17]
   4524 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
   4525 ; AVX2-SLOW-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[6,7,4,5,0,1,10,11,4,5,10,11,4,5,6,7,22,23,20,21,16,17,26,27,20,21,26,27,20,21,22,23]
   4526 ; AVX2-SLOW-NEXT:    vmovdqa {{.*#+}} ymm3 = <255,255,255,255,u,u,255,255,255,255,0,0,u,u,0,0,u,u,u,u,255,255,0,0,u,u,u,u,u,u,0,0>
   4527 ; AVX2-SLOW-NEXT:    vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
   4528 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} ymm2 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15]
   4529 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} ymm2 = ymm2[0,1,2,3,5,5,6,7,8,9,10,11,13,13,14,15]
   4530 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
   4531 ; AVX2-SLOW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,6,7,u,u,18,19,u,u,u,u,u,u,u,u,24,25,16,17,u,u]
   4532 ; AVX2-SLOW-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2],ymm0[3],ymm2[4],ymm0[5,6,7,8],ymm2[9,10],ymm0[11],ymm2[12],ymm0[13,14,15]
   4533 ; AVX2-SLOW-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,0,255,255,255,255,255,255,0,0,255,255,0,0,0,0,255,255,255,255,0,0,0,0,0,0,255,255]
   4534 ; AVX2-SLOW-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
   4535 ; AVX2-SLOW-NEXT:    retq
   4536 ;
   4537 ; AVX2-FAST-LABEL: PR24935:
   4538 ; AVX2-FAST:       # %bb.0:
   4539 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm2 = ymm1[8,9,10,11,4,5,8,9,0,1,14,15,12,13,0,1,24,25,26,27,20,21,24,25,16,17,30,31,28,29,16,17]
   4540 ; AVX2-FAST-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
   4541 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[6,7,4,5,0,1,10,11,4,5,10,11,4,5,6,7,22,23,20,21,16,17,26,27,20,21,26,27,20,21,22,23]
   4542 ; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} ymm3 = <255,255,255,255,u,u,255,255,255,255,0,0,u,u,0,0,u,u,u,u,255,255,0,0,u,u,u,u,u,u,0,0>
   4543 ; AVX2-FAST-NEXT:    vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
   4544 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm2 = ymm0[0,1,2,3,2,3,6,7,10,11,10,11,12,13,14,15,16,17,18,19,18,19,22,23,26,27,26,27,28,29,30,31]
   4545 ; AVX2-FAST-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
   4546 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,6,7,u,u,18,19,u,u,u,u,u,u,u,u,24,25,16,17,u,u]
   4547 ; AVX2-FAST-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2],ymm0[3],ymm2[4],ymm0[5,6,7,8],ymm2[9,10],ymm0[11],ymm2[12],ymm0[13,14,15]
   4548 ; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,0,255,255,255,255,255,255,0,0,255,255,0,0,0,0,255,255,255,255,0,0,0,0,0,0,255,255]
   4549 ; AVX2-FAST-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
   4550 ; AVX2-FAST-NEXT:    retq
   4551 ;
   4552 ; AVX512VL-LABEL: PR24935:
   4553 ; AVX512VL:       # %bb.0:
   4554 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [11,10,17,13,10,7,27,0,17,25,0,12,29,20,16,8]
   4555 ; AVX512VL-NEXT:    vpermi2w %ymm0, %ymm1, %ymm2
   4556 ; AVX512VL-NEXT:    vmovdqa %ymm2, %ymm0
   4557 ; AVX512VL-NEXT:    retq
   4558   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 27, i32 26, i32 1, i32 29, i32 26, i32 23, i32 11, i32 16, i32 1, i32 9, i32 16, i32 28, i32 13, i32 4, i32 0, i32 24>
   4559   ret <16 x i16> %shuffle
   4560 }
   4561 
   4562 define <16 x i16> @PR34369(<16 x i16> %vec, <16 x i16> %mask) {
   4563 ; AVX1-LABEL: PR34369:
   4564 ; AVX1:       # %bb.0:
   4565 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   4566 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm3 = xmm2[8,9,10,11,4,5,10,11,8,9,10,11,4,5,4,5]
   4567 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,7,0,1,0,1,6,7,10,11,4,5,4,5,6,7]
   4568 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3],xmm0[4,5,6],xmm3[7]
   4569 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[14,15,0,1,12,13,0,1,2,3,4,5,8,9,8,9]
   4570 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   4571 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   4572 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
   4573 ; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm2, %xmm2
   4574 ; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm1, %xmm1
   4575 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
   4576 ; AVX1-NEXT:    vandps %ymm0, %ymm1, %ymm0
   4577 ; AVX1-NEXT:    retq
   4578 ;
   4579 ; AVX2-LABEL: PR34369:
   4580 ; AVX2:       # %bb.0:
   4581 ; AVX2-NEXT:    vpermq {{.*#+}} ymm2 = ymm0[2,3,0,1]
   4582 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[8,9,10,11,4,5,10,11,8,9,10,11,4,5,4,5]
   4583 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[6,7,0,1,0,1,u,u,10,11,4,5,4,5,u,u,30,31,16,17,28,29,16,17,18,19,20,21,24,25,24,25]
   4584 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,0,0,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
   4585 ; AVX2-NEXT:    vpblendvb %ymm3, %ymm0, %ymm2, %ymm0
   4586 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
   4587 ; AVX2-NEXT:    vpcmpeqw %ymm2, %ymm1, %ymm1
   4588 ; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm0
   4589 ; AVX2-NEXT:    retq
   4590 ;
   4591 ; AVX512VL-LABEL: PR34369:
   4592 ; AVX512VL:       # %bb.0:
   4593 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [3,0,0,13,5,2,2,10,15,8,14,8,9,10,12,12]
   4594 ; AVX512VL-NEXT:    vptestnmw %ymm1, %ymm1, %k1
   4595 ; AVX512VL-NEXT:    vpermw %ymm0, %ymm2, %ymm0 {%k1} {z}
   4596 ; AVX512VL-NEXT:    retq
   4597   %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 0, i32 0, i32 13, i32 5, i32 2, i32 2, i32 10, i32 15, i32 8, i32 14, i32 8, i32 9, i32 10, i32 12, i32 12>
   4598   %cmp = icmp eq <16 x i16> %mask, zeroinitializer
   4599   %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
   4600   ret <16 x i16> %res
   4601 }
   4602 
   4603 define <16 x i16> @insert_dup_mem_v16i16_i32(i32* %ptr) {
   4604 ; AVX1-LABEL: insert_dup_mem_v16i16_i32:
   4605 ; AVX1:       # %bb.0:
   4606 ; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   4607 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
   4608 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
   4609 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   4610 ; AVX1-NEXT:    retq
   4611 ;
   4612 ; AVX2OR512VL-LABEL: insert_dup_mem_v16i16_i32:
   4613 ; AVX2OR512VL:       # %bb.0:
   4614 ; AVX2OR512VL-NEXT:    vpbroadcastw (%rdi), %ymm0
   4615 ; AVX2OR512VL-NEXT:    retq
   4616   %tmp = load i32, i32* %ptr, align 4
   4617   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
   4618   %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
   4619   %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> zeroinitializer
   4620   ret <16 x i16> %tmp3
   4621 }
   4622 
   4623 define <16 x i16> @insert_dup_mem_v16i16_sext_i16(i16* %ptr) {
   4624 ; AVX1-LABEL: insert_dup_mem_v16i16_sext_i16:
   4625 ; AVX1:       # %bb.0:
   4626 ; AVX1-NEXT:    movswl (%rdi), %eax
   4627 ; AVX1-NEXT:    vmovd %eax, %xmm0
   4628 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
   4629 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
   4630 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   4631 ; AVX1-NEXT:    retq
   4632 ;
   4633 ; AVX2-LABEL: insert_dup_mem_v16i16_sext_i16:
   4634 ; AVX2:       # %bb.0:
   4635 ; AVX2-NEXT:    movswl (%rdi), %eax
   4636 ; AVX2-NEXT:    vmovd %eax, %xmm0
   4637 ; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
   4638 ; AVX2-NEXT:    retq
   4639 ;
   4640 ; AVX512VL-LABEL: insert_dup_mem_v16i16_sext_i16:
   4641 ; AVX512VL:       # %bb.0:
   4642 ; AVX512VL-NEXT:    movswl (%rdi), %eax
   4643 ; AVX512VL-NEXT:    vpbroadcastw %eax, %ymm0
   4644 ; AVX512VL-NEXT:    retq
   4645   %tmp = load i16, i16* %ptr, align 2
   4646   %tmp1 = sext i16 %tmp to i32
   4647   %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
   4648   %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
   4649   %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <16 x i32> zeroinitializer
   4650   ret <16 x i16> %tmp4
   4651 }
   4652 
   4653 define <16 x i16> @insert_dup_elt1_mem_v16i16_i32(i32* %ptr) #0 {
   4654 ; AVX1-LABEL: insert_dup_elt1_mem_v16i16_i32:
   4655 ; AVX1:       # %bb.0:
   4656 ; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   4657 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
   4658 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
   4659 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   4660 ; AVX1-NEXT:    retq
   4661 ;
   4662 ; AVX2OR512VL-LABEL: insert_dup_elt1_mem_v16i16_i32:
   4663 ; AVX2OR512VL:       # %bb.0:
   4664 ; AVX2OR512VL-NEXT:    vpbroadcastw 2(%rdi), %ymm0
   4665 ; AVX2OR512VL-NEXT:    retq
   4666   %tmp = load i32, i32* %ptr, align 4
   4667   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
   4668   %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
   4669   %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   4670   ret <16 x i16> %tmp3
   4671 }
   4672 
   4673 define <16 x i16> @insert_dup_elt3_mem_v16i16_i32(i32* %ptr) #0 {
   4674 ; AVX1-LABEL: insert_dup_elt3_mem_v16i16_i32:
   4675 ; AVX1:       # %bb.0:
   4676 ; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   4677 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
   4678 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   4679 ; AVX1-NEXT:    retq
   4680 ;
   4681 ; AVX2OR512VL-LABEL: insert_dup_elt3_mem_v16i16_i32:
   4682 ; AVX2OR512VL:       # %bb.0:
   4683 ; AVX2OR512VL-NEXT:    vpbroadcastw 2(%rdi), %ymm0
   4684 ; AVX2OR512VL-NEXT:    retq
   4685   %tmp = load i32, i32* %ptr, align 4
   4686   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
   4687   %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
   4688   %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
   4689   ret <16 x i16> %tmp3
   4690 }
   4691