Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX1
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-SLOW
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-FAST
      8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VLBW
      9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+avx512vbmi,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VLVBMI
     10 
     11 define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i8> %a, <16 x i8> %b) {
     12 ; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
     13 ; SSE2:       # %bb.0:
     14 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
     15 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
     16 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
     17 ; SSE2-NEXT:    retq
     18 ;
     19 ; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
     20 ; SSSE3:       # %bb.0:
     21 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
     22 ; SSSE3-NEXT:    pshufb %xmm1, %xmm0
     23 ; SSSE3-NEXT:    retq
     24 ;
     25 ; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
     26 ; SSE41:       # %bb.0:
     27 ; SSE41-NEXT:    pxor %xmm1, %xmm1
     28 ; SSE41-NEXT:    pshufb %xmm1, %xmm0
     29 ; SSE41-NEXT:    retq
     30 ;
     31 ; AVX1-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
     32 ; AVX1:       # %bb.0:
     33 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
     34 ; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
     35 ; AVX1-NEXT:    retq
     36 ;
     37 ; AVX2OR512VL-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
     38 ; AVX2OR512VL:       # %bb.0:
     39 ; AVX2OR512VL-NEXT:    vpbroadcastb %xmm0, %xmm0
     40 ; AVX2OR512VL-NEXT:    retq
     41   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
     42   ret <16 x i8> %shuffle
     43 }
     44 
     45 define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01(<16 x i8> %a, <16 x i8> %b) {
     46 ; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01:
     47 ; SSE2:       # %bb.0:
     48 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
     49 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
     50 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
     51 ; SSE2-NEXT:    retq
     52 ;
     53 ; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01:
     54 ; SSSE3:       # %bb.0:
     55 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
     56 ; SSSE3-NEXT:    retq
     57 ;
     58 ; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01:
     59 ; SSE41:       # %bb.0:
     60 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
     61 ; SSE41-NEXT:    retq
     62 ;
     63 ; AVX-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01:
     64 ; AVX:       # %bb.0:
     65 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
     66 ; AVX-NEXT:    retq
     67   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
     68   ret <16 x i8> %shuffle
     69 }
     70 
     71 define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i8> %a, <16 x i8> %b) {
     72 ; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
     73 ; SSE2:       # %bb.0:
     74 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
     75 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
     76 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
     77 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
     78 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
     79 ; SSE2-NEXT:    retq
     80 ;
     81 ; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
     82 ; SSSE3:       # %bb.0:
     83 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
     84 ; SSSE3-NEXT:    retq
     85 ;
     86 ; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
     87 ; SSE41:       # %bb.0:
     88 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
     89 ; SSE41-NEXT:    retq
     90 ;
     91 ; AVX-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
     92 ; AVX:       # %bb.0:
     93 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
     94 ; AVX-NEXT:    retq
     95   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
     96   ret <16 x i8> %shuffle
     97 }
     98 
     99 define <16 x i8> @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03(<16 x i8> %a, <16 x i8> %b) {
    100 ; SSE-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03:
    101 ; SSE:       # %bb.0:
    102 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    103 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
    104 ; SSE-NEXT:    retq
    105 ;
    106 ; AVX1-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03:
    107 ; AVX1:       # %bb.0:
    108 ; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    109 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
    110 ; AVX1-NEXT:    retq
    111 ;
    112 ; AVX2-SLOW-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03:
    113 ; AVX2-SLOW:       # %bb.0:
    114 ; AVX2-SLOW-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    115 ; AVX2-SLOW-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
    116 ; AVX2-SLOW-NEXT:    retq
    117 ;
    118 ; AVX2-FAST-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03:
    119 ; AVX2-FAST:       # %bb.0:
    120 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3]
    121 ; AVX2-FAST-NEXT:    retq
    122 ;
    123 ; AVX512VL-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03:
    124 ; AVX512VL:       # %bb.0:
    125 ; AVX512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3]
    126 ; AVX512VL-NEXT:    retq
    127   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
    128   ret <16 x i8> %shuffle
    129 }
    130 
    131 define <16 x i8> @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07(<16 x i8> %a, <16 x i8> %b) {
    132 ; SSE-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07:
    133 ; SSE:       # %bb.0:
    134 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    135 ; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
    136 ; SSE-NEXT:    retq
    137 ;
    138 ; AVX1-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07:
    139 ; AVX1:       # %bb.0:
    140 ; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    141 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
    142 ; AVX1-NEXT:    retq
    143 ;
    144 ; AVX2-SLOW-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07:
    145 ; AVX2-SLOW:       # %bb.0:
    146 ; AVX2-SLOW-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    147 ; AVX2-SLOW-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
    148 ; AVX2-SLOW-NEXT:    retq
    149 ;
    150 ; AVX2-FAST-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07:
    151 ; AVX2-FAST:       # %bb.0:
    152 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,4,4,4,5,5,5,5,6,6,6,6,7,7,7,7]
    153 ; AVX2-FAST-NEXT:    retq
    154 ;
    155 ; AVX512VL-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07:
    156 ; AVX512VL:       # %bb.0:
    157 ; AVX512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,4,4,4,5,5,5,5,6,6,6,6,7,7,7,7]
    158 ; AVX512VL-NEXT:    retq
    159   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7>
    160   ret <16 x i8> %shuffle
    161 }
    162 
    163 define <16 x i8> @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i8> %a, <16 x i8> %b) {
    164 ; SSE2-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
    165 ; SSE2:       # %bb.0:
    166 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
    167 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
    168 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    169 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    170 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
    171 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6]
    172 ; SSE2-NEXT:    retq
    173 ;
    174 ; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
    175 ; SSSE3:       # %bb.0:
    176 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
    177 ; SSSE3-NEXT:    retq
    178 ;
    179 ; SSE41-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
    180 ; SSE41:       # %bb.0:
    181 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
    182 ; SSE41-NEXT:    retq
    183 ;
    184 ; AVX-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
    185 ; AVX:       # %bb.0:
    186 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
    187 ; AVX-NEXT:    retq
    188   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
    189   ret <16 x i8> %shuffle
    190 }
    191 
    192 define <16 x i8> @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07(<16 x i8> %a, <16 x i8> %b) {
    193 ; SSE-LABEL: shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07:
    194 ; SSE:       # %bb.0:
    195 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    196 ; SSE-NEXT:    retq
    197 ;
    198 ; AVX-LABEL: shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07:
    199 ; AVX:       # %bb.0:
    200 ; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    201 ; AVX-NEXT:    retq
    202   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
    203   ret <16 x i8> %shuffle
    204 }
    205 
    206 define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) {
    207 ; SSE-LABEL: shuffle_v16i8_0101010101010101:
    208 ; SSE:       # %bb.0:
    209 ; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
    210 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    211 ; SSE-NEXT:    retq
    212 ;
    213 ; AVX1-LABEL: shuffle_v16i8_0101010101010101:
    214 ; AVX1:       # %bb.0:
    215 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
    216 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    217 ; AVX1-NEXT:    retq
    218 ;
    219 ; AVX2OR512VL-LABEL: shuffle_v16i8_0101010101010101:
    220 ; AVX2OR512VL:       # %bb.0:
    221 ; AVX2OR512VL-NEXT:    vpbroadcastw %xmm0, %xmm0
    222 ; AVX2OR512VL-NEXT:    retq
    223   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
    224   ret <16 x i8> %shuffle
    225 }
    226 
    227 define <16 x i8> @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i8> %a, <16 x i8> %b) {
    228 ; SSE-LABEL: shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
    229 ; SSE:       # %bb.0:
    230 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    231 ; SSE-NEXT:    retq
    232 ;
    233 ; AVX-LABEL: shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
    234 ; AVX:       # %bb.0:
    235 ; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    236 ; AVX-NEXT:    retq
    237   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
    238   ret <16 x i8> %shuffle
    239 }
    240 
    241 define <16 x i8> @shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31(<16 x i8> %a, <16 x i8> %b) {
    242 ; SSE-LABEL: shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31:
    243 ; SSE:       # %bb.0:
    244 ; SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
    245 ; SSE-NEXT:    retq
    246 ;
    247 ; AVX-LABEL: shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31:
    248 ; AVX:       # %bb.0:
    249 ; AVX-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
    250 ; AVX-NEXT:    retq
    251   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
    252   ret <16 x i8> %shuffle
    253 }
    254 
    255 define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(<16 x i8> %a, <16 x i8> %b) {
    256 ; SSE2-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
    257 ; SSE2:       # %bb.0:
    258 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    259 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
    260 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,0,2,3,4,5,6,7]
    261 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    262 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
    263 ; SSE2-NEXT:    por %xmm2, %xmm0
    264 ; SSE2-NEXT:    retq
    265 ;
    266 ; SSSE3-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
    267 ; SSSE3:       # %bb.0:
    268 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    269 ; SSSE3-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
    270 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
    271 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    272 ; SSSE3-NEXT:    retq
    273 ;
    274 ; SSE41-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
    275 ; SSE41:       # %bb.0:
    276 ; SSE41-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    277 ; SSE41-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
    278 ; SSE41-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
    279 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    280 ; SSE41-NEXT:    retq
    281 ;
    282 ; AVX1-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
    283 ; AVX1:       # %bb.0:
    284 ; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    285 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
    286 ; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
    287 ; AVX1-NEXT:    retq
    288 ;
    289 ; AVX2OR512VL-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
    290 ; AVX2OR512VL:       # %bb.0:
    291 ; AVX2OR512VL-NEXT:    vpbroadcastb %xmm1, %xmm1
    292 ; AVX2OR512VL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
    293 ; AVX2OR512VL-NEXT:    retq
    294   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7>
    295   ret <16 x i8> %shuffle
    296 }
    297 
    298 define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12(<16 x i8> %a, <16 x i8> %b) {
    299 ; SSE2-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12:
    300 ; SSE2:       # %bb.0:
    301 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    302 ; SSE2-NEXT:    movdqa %xmm0, %xmm2
    303 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
    304 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
    305 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
    306 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    307 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
    308 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
    309 ; SSE2-NEXT:    packuswb %xmm2, %xmm0
    310 ; SSE2-NEXT:    retq
    311 ;
    312 ; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12:
    313 ; SSSE3:       # %bb.0:
    314 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
    315 ; SSSE3-NEXT:    retq
    316 ;
    317 ; SSE41-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12:
    318 ; SSE41:       # %bb.0:
    319 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
    320 ; SSE41-NEXT:    retq
    321 ;
    322 ; AVX-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12:
    323 ; AVX:       # %bb.0:
    324 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
    325 ; AVX-NEXT:    retq
    326   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
    327   ret <16 x i8> %shuffle
    328 }
    329 
    330 define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20(<16 x i8> %a, <16 x i8> %b) {
    331 ; SSE2-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
    332 ; SSE2:       # %bb.0:
    333 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    334 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    335 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
    336 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
    337 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
    338 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
    339 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
    340 ; SSE2-NEXT:    packuswb %xmm1, %xmm0
    341 ; SSE2-NEXT:    retq
    342 ;
    343 ; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
    344 ; SSSE3:       # %bb.0:
    345 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    346 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9]
    347 ; SSSE3-NEXT:    retq
    348 ;
    349 ; SSE41-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
    350 ; SSE41:       # %bb.0:
    351 ; SSE41-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    352 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9]
    353 ; SSE41-NEXT:    retq
    354 ;
    355 ; AVX-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
    356 ; AVX:       # %bb.0:
    357 ; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    358 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9]
    359 ; AVX-NEXT:    retq
    360   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20>
    361   ret <16 x i8> %shuffle
    362 }
    363 
    364 define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20(<16 x i8> %a, <16 x i8> %b) {
    365 ; SSE2-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
    366 ; SSE2:       # %bb.0:
    367 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
    368 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    369 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    370 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    371 ; SSE2-NEXT:    movdqa %xmm0, %xmm2
    372 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
    373 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm3 = xmm2[3,2,1,0,4,5,6,7]
    374 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    375 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,6,5,4]
    376 ; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm3[0],xmm1[1]
    377 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm3 = xmm0[3,2,1,0,4,5,6,7]
    378 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm2[0,1,2,3,7,6,5,4]
    379 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
    380 ; SSE2-NEXT:    packuswb %xmm1, %xmm0
    381 ; SSE2-NEXT:    retq
    382 ;
    383 ; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
    384 ; SSSE3:       # %bb.0:
    385 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u]
    386 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u]
    387 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    388 ; SSSE3-NEXT:    retq
    389 ;
    390 ; SSE41-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
    391 ; SSE41:       # %bb.0:
    392 ; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u]
    393 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u]
    394 ; SSE41-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    395 ; SSE41-NEXT:    retq
    396 ;
    397 ; AVX-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
    398 ; AVX:       # %bb.0:
    399 ; AVX-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u]
    400 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u]
    401 ; AVX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    402 ; AVX-NEXT:    retq
    403   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 31, i32 30, i32 29, i32 28, i32 11, i32 10, i32 9, i32 8, i32 23, i32 22, i32 21, i32 20>
    404   ret <16 x i8> %shuffle
    405 }
    406 
    407 define <16 x i8> @shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31(<16 x i8> %a, <16 x i8> %b) {
    408 ; SSE2-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
    409 ; SSE2:       # %bb.0:
    410 ; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
    411 ; SSE2-NEXT:    andps %xmm2, %xmm0
    412 ; SSE2-NEXT:    andnps %xmm1, %xmm2
    413 ; SSE2-NEXT:    orps %xmm2, %xmm0
    414 ; SSE2-NEXT:    retq
    415 ;
    416 ; SSSE3-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
    417 ; SSSE3:       # %bb.0:
    418 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
    419 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
    420 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    421 ; SSSE3-NEXT:    retq
    422 ;
    423 ; SSE41-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
    424 ; SSE41:       # %bb.0:
    425 ; SSE41-NEXT:    movdqa %xmm0, %xmm2
    426 ; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
    427 ; SSE41-NEXT:    pblendvb %xmm0, %xmm2, %xmm1
    428 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    429 ; SSE41-NEXT:    retq
    430 ;
    431 ; AVX1OR2-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
    432 ; AVX1OR2:       # %bb.0:
    433 ; AVX1OR2-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
    434 ; AVX1OR2-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
    435 ; AVX1OR2-NEXT:    retq
    436 ;
    437 ; AVX512VL-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
    438 ; AVX512VL:       # %bb.0:
    439 ; AVX512VL-NEXT:    movw $-21846, %ax # imm = 0xAAAA
    440 ; AVX512VL-NEXT:    kmovd %eax, %k1
    441 ; AVX512VL-NEXT:    vmovdqu8 %xmm1, %xmm0 {%k1}
    442 ; AVX512VL-NEXT:    retq
    443   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
    444   ret <16 x i8> %shuffle
    445 }
    446 
    447 define <16 x i8> @shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31(<16 x i8> %a, <16 x i8> %b) {
    448 ; SSE2-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31:
    449 ; SSE2:       # %bb.0:
    450 ; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
    451 ; SSE2-NEXT:    andps %xmm2, %xmm0
    452 ; SSE2-NEXT:    andnps %xmm1, %xmm2
    453 ; SSE2-NEXT:    orps %xmm2, %xmm0
    454 ; SSE2-NEXT:    retq
    455 ;
    456 ; SSSE3-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31:
    457 ; SSSE3:       # %bb.0:
    458 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[15]
    459 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2],zero,xmm0[4,5,6],zero,xmm0[8,9,10],zero,xmm0[12,13,14],zero
    460 ; SSSE3-NEXT:    por %xmm1, %xmm0
    461 ; SSSE3-NEXT:    retq
    462 ;
    463 ; SSE41-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31:
    464 ; SSE41:       # %bb.0:
    465 ; SSE41-NEXT:    movdqa %xmm0, %xmm2
    466 ; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
    467 ; SSE41-NEXT:    pblendvb %xmm0, %xmm2, %xmm1
    468 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    469 ; SSE41-NEXT:    retq
    470 ;
    471 ; AVX1OR2-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31:
    472 ; AVX1OR2:       # %bb.0:
    473 ; AVX1OR2-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
    474 ; AVX1OR2-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
    475 ; AVX1OR2-NEXT:    retq
    476 ;
    477 ; AVX512VL-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31:
    478 ; AVX512VL:       # %bb.0:
    479 ; AVX512VL-NEXT:    movw $-30584, %ax # imm = 0x8888
    480 ; AVX512VL-NEXT:    kmovd %eax, %k1
    481 ; AVX512VL-NEXT:    vmovdqu8 %xmm1, %xmm0 {%k1}
    482 ; AVX512VL-NEXT:    retq
    483   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31>
    484   ret <16 x i8> %shuffle
    485 }
    486 
    487 define <16 x i8> @shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz(<16 x i8> %a) {
    488 ; SSE-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz:
    489 ; SSE:       # %bb.0:
    490 ; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
    491 ; SSE-NEXT:    retq
    492 ;
    493 ; AVX-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz:
    494 ; AVX:       # %bb.0:
    495 ; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
    496 ; AVX-NEXT:    retq
    497   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31>
    498   ret <16 x i8> %shuffle
    499 }
    500 
    501 define <16 x i8> @shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31(<16 x i8> %a, <16 x i8> %b) {
    502 ; SSE2-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
    503 ; SSE2:       # %bb.0:
    504 ; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0]
    505 ; SSE2-NEXT:    andps %xmm2, %xmm0
    506 ; SSE2-NEXT:    andnps %xmm1, %xmm2
    507 ; SSE2-NEXT:    orps %xmm2, %xmm0
    508 ; SSE2-NEXT:    retq
    509 ;
    510 ; SSSE3-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
    511 ; SSSE3:       # %bb.0:
    512 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm1[4],zero,zero,xmm1[7],zero,zero,zero,zero,xmm1[12],zero,zero,xmm1[15]
    513 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,xmm0[5,6],zero,xmm0[8,9,10,11],zero,xmm0[13,14],zero
    514 ; SSSE3-NEXT:    por %xmm1, %xmm0
    515 ; SSSE3-NEXT:    retq
    516 ;
    517 ; SSE41-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
    518 ; SSE41:       # %bb.0:
    519 ; SSE41-NEXT:    movdqa %xmm0, %xmm2
    520 ; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0]
    521 ; SSE41-NEXT:    pblendvb %xmm0, %xmm2, %xmm1
    522 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    523 ; SSE41-NEXT:    retq
    524 ;
    525 ; AVX1OR2-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
    526 ; AVX1OR2:       # %bb.0:
    527 ; AVX1OR2-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0]
    528 ; AVX1OR2-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
    529 ; AVX1OR2-NEXT:    retq
    530 ;
    531 ; AVX512VL-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
    532 ; AVX512VL:       # %bb.0:
    533 ; AVX512VL-NEXT:    movw $-28528, %ax # imm = 0x9090
    534 ; AVX512VL-NEXT:    kmovd %eax, %k1
    535 ; AVX512VL-NEXT:    vmovdqu8 %xmm1, %xmm0 {%k1}
    536 ; AVX512VL-NEXT:    retq
    537   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 13, i32 14, i32 31>
    538   ret <16 x i8> %shuffle
    539 }
    540 
    541 define <16 x i8> @shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15(<16 x i8> %a, <16 x i8> %b) {
    542 ; SSE2-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15:
    543 ; SSE2:       # %bb.0:
    544 ; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0]
    545 ; SSE2-NEXT:    andps %xmm2, %xmm1
    546 ; SSE2-NEXT:    andnps %xmm0, %xmm2
    547 ; SSE2-NEXT:    orps %xmm1, %xmm2
    548 ; SSE2-NEXT:    movaps %xmm2, %xmm0
    549 ; SSE2-NEXT:    retq
    550 ;
    551 ; SSSE3-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15:
    552 ; SSSE3:       # %bb.0:
    553 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[4,5,6,7],zero,zero,xmm0[10,11],zero,xmm0[13],zero,xmm0[15]
    554 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3],zero,zero,zero,zero,xmm1[8,9],zero,zero,xmm1[12],zero,xmm1[14],zero
    555 ; SSSE3-NEXT:    por %xmm1, %xmm0
    556 ; SSSE3-NEXT:    retq
    557 ;
    558 ; SSE41-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15:
    559 ; SSE41:       # %bb.0:
    560 ; SSE41-NEXT:    movdqa %xmm0, %xmm2
    561 ; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0]
    562 ; SSE41-NEXT:    pblendvb %xmm0, %xmm1, %xmm2
    563 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
    564 ; SSE41-NEXT:    retq
    565 ;
    566 ; AVX1OR2-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15:
    567 ; AVX1OR2:       # %bb.0:
    568 ; AVX1OR2-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0]
    569 ; AVX1OR2-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
    570 ; AVX1OR2-NEXT:    retq
    571 ;
    572 ; AVX512VL-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15:
    573 ; AVX512VL:       # %bb.0:
    574 ; AVX512VL-NEXT:    movw $-21264, %ax # imm = 0xACF0
    575 ; AVX512VL-NEXT:    kmovd %eax, %k1
    576 ; AVX512VL-NEXT:    vpblendmb %xmm0, %xmm1, %xmm0 {%k1}
    577 ; AVX512VL-NEXT:    retq
    578   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 10, i32 11, i32 28, i32 13, i32 30, i32 15>
    579   ret <16 x i8> %shuffle
    580 }
    581 
    582 define <16 x i8> @trunc_v4i32_shuffle(<16 x i8> %a) {
    583 ; SSE2-LABEL: trunc_v4i32_shuffle:
    584 ; SSE2:       # %bb.0:
    585 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
    586 ; SSE2-NEXT:    packuswb %xmm0, %xmm0
    587 ; SSE2-NEXT:    packuswb %xmm0, %xmm0
    588 ; SSE2-NEXT:    retq
    589 ;
    590 ; SSSE3-LABEL: trunc_v4i32_shuffle:
    591 ; SSSE3:       # %bb.0:
    592 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
    593 ; SSSE3-NEXT:    retq
    594 ;
    595 ; SSE41-LABEL: trunc_v4i32_shuffle:
    596 ; SSE41:       # %bb.0:
    597 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
    598 ; SSE41-NEXT:    retq
    599 ;
    600 ; AVX-LABEL: trunc_v4i32_shuffle:
    601 ; AVX:       # %bb.0:
    602 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
    603 ; AVX-NEXT:    retq
    604   %shuffle = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    605   ret <16 x i8> %shuffle
    606 }
    607 
    608 define <16 x i8> @stress_test0(<16 x i8> %s.0.1, <16 x i8> %s.0.2, <16 x i8> %s.0.3, <16 x i8> %s.0.4, <16 x i8> %s.0.5, <16 x i8> %s.0.6, <16 x i8> %s.0.7, <16 x i8> %s.0.8, <16 x i8> %s.0.9) {
    609 ; We don't have anything useful to check here. This generates 100s of
    610 ; instructions. Instead, just make sure we survived codegen.
    611 ; ALL-LABEL: stress_test0:
    612 ; ALL:         retq
    613 entry:
    614   %s.1.4 = shufflevector <16 x i8> %s.0.4, <16 x i8> %s.0.5, <16 x i32> <i32 1, i32 22, i32 21, i32 28, i32 3, i32 16, i32 6, i32 1, i32 19, i32 29, i32 12, i32 31, i32 2, i32 3, i32 3, i32 6>
    615   %s.1.5 = shufflevector <16 x i8> %s.0.5, <16 x i8> %s.0.6, <16 x i32> <i32 31, i32 20, i32 12, i32 19, i32 2, i32 15, i32 12, i32 31, i32 2, i32 28, i32 2, i32 30, i32 7, i32 8, i32 17, i32 28>
    616   %s.1.8 = shufflevector <16 x i8> %s.0.8, <16 x i8> %s.0.9, <16 x i32> <i32 14, i32 10, i32 17, i32 5, i32 17, i32 9, i32 17, i32 21, i32 31, i32 24, i32 16, i32 6, i32 20, i32 28, i32 23, i32 8>
    617   %s.2.2 = shufflevector <16 x i8> %s.0.3, <16 x i8> %s.0.4, <16 x i32> <i32 20, i32 9, i32 21, i32 11, i32 11, i32 4, i32 3, i32 18, i32 3, i32 30, i32 4, i32 31, i32 11, i32 24, i32 13, i32 29>
    618   %s.3.2 = shufflevector <16 x i8> %s.2.2, <16 x i8> %s.1.4, <16 x i32> <i32 15, i32 13, i32 5, i32 11, i32 7, i32 17, i32 14, i32 22, i32 22, i32 16, i32 7, i32 24, i32 16, i32 22, i32 7, i32 29>
    619   %s.5.4 = shufflevector <16 x i8> %s.1.5, <16 x i8> %s.1.8, <16 x i32> <i32 3, i32 13, i32 19, i32 7, i32 23, i32 11, i32 1, i32 9, i32 16, i32 25, i32 2, i32 7, i32 0, i32 21, i32 23, i32 17>
    620   %s.6.1 = shufflevector <16 x i8> %s.3.2, <16 x i8> %s.3.2, <16 x i32> <i32 11, i32 2, i32 28, i32 31, i32 27, i32 3, i32 9, i32 27, i32 25, i32 25, i32 14, i32 7, i32 12, i32 28, i32 12, i32 23>
    621   %s.7.1 = shufflevector <16 x i8> %s.6.1, <16 x i8> %s.3.2, <16 x i32> <i32 15, i32 29, i32 14, i32 0, i32 29, i32 15, i32 26, i32 30, i32 6, i32 7, i32 2, i32 8, i32 12, i32 10, i32 29, i32 17>
    622   %s.7.2 = shufflevector <16 x i8> %s.3.2, <16 x i8> %s.5.4, <16 x i32> <i32 3, i32 29, i32 3, i32 19, i32 undef, i32 20, i32 undef, i32 3, i32 27, i32 undef, i32 undef, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
    623   %s.16.0 = shufflevector <16 x i8> %s.7.1, <16 x i8> %s.7.2, <16 x i32> <i32 13, i32 1, i32 16, i32 16, i32 6, i32 7, i32 29, i32 18, i32 19, i32 28, i32 undef, i32 undef, i32 31, i32 1, i32 undef, i32 10>
    624   ret <16 x i8> %s.16.0
    625 }
    626 
    627 define <16 x i8> @undef_test1(<16 x i8> %s.0.5, <16 x i8> %s.0.8, <16 x i8> %s.0.9) noinline nounwind {
    628 ; There is nothing interesting to check about these instructions other than
    629 ; that they survive codegen. However, we actually do better and delete all of
    630 ; them because the result is 'undef'.
    631 ;
    632 ; ALL-LABEL: undef_test1:
    633 ; ALL:       # %bb.0: # %entry
    634 ; ALL-NEXT:    retq
    635 entry:
    636   %s.1.8 = shufflevector <16 x i8> %s.0.8, <16 x i8> undef, <16 x i32> <i32 9, i32 9, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 6, i32 undef, i32 6, i32 undef, i32 14, i32 14, i32 undef, i32 undef, i32 0>
    637   %s.2.4 = shufflevector <16 x i8> undef, <16 x i8> %s.0.5, <16 x i32> <i32 21, i32 undef, i32 undef, i32 19, i32 undef, i32 undef, i32 29, i32 24, i32 21, i32 23, i32 21, i32 17, i32 19, i32 undef, i32 20, i32 22>
    638   %s.2.5 = shufflevector <16 x i8> %s.0.5, <16 x i8> undef, <16 x i32> <i32 3, i32 8, i32 undef, i32 7, i32 undef, i32 10, i32 8, i32 0, i32 15, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 9>
    639   %s.2.9 = shufflevector <16 x i8> %s.0.9, <16 x i8> undef, <16 x i32> <i32 7, i32 undef, i32 14, i32 7, i32 8, i32 undef, i32 7, i32 8, i32 5, i32 15, i32 undef, i32 1, i32 11, i32 undef, i32 undef, i32 11>
    640   %s.3.4 = shufflevector <16 x i8> %s.2.4, <16 x i8> %s.0.5, <16 x i32> <i32 5, i32 0, i32 21, i32 6, i32 15, i32 27, i32 22, i32 21, i32 4, i32 22, i32 19, i32 26, i32 9, i32 26, i32 8, i32 29>
    641   %s.3.9 = shufflevector <16 x i8> %s.2.9, <16 x i8> undef, <16 x i32> <i32 8, i32 6, i32 8, i32 1, i32 undef, i32 4, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 6, i32 undef>
    642   %s.4.7 = shufflevector <16 x i8> %s.1.8, <16 x i8> %s.2.9, <16 x i32> <i32 9, i32 0, i32 22, i32 20, i32 24, i32 7, i32 21, i32 17, i32 20, i32 12, i32 19, i32 23, i32 2, i32 9, i32 17, i32 10>
    643   %s.4.8 = shufflevector <16 x i8> %s.2.9, <16 x i8> %s.3.9, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 6, i32 10, i32 undef, i32 0, i32 5, i32 undef, i32 9, i32 undef>
    644   %s.5.7 = shufflevector <16 x i8> %s.4.7, <16 x i8> %s.4.8, <16 x i32> <i32 16, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    645   %s.8.4 = shufflevector <16 x i8> %s.3.4, <16 x i8> %s.5.7, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 28, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    646   %s.9.4 = shufflevector <16 x i8> %s.8.4, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 10, i32 5>
    647   %s.10.4 = shufflevector <16 x i8> %s.9.4, <16 x i8> undef, <16 x i32> <i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    648   %s.12.4 = shufflevector <16 x i8> %s.10.4, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef>
    649 
    650   ret <16 x i8> %s.12.4
    651 }
    652 
    653 define <16 x i8> @PR20540(<8 x i8> %a) {
    654 ; SSE2-LABEL: PR20540:
    655 ; SSE2:       # %bb.0:
    656 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
    657 ; SSE2-NEXT:    packuswb %xmm0, %xmm0
    658 ; SSE2-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
    659 ; SSE2-NEXT:    retq
    660 ;
    661 ; SSSE3-LABEL: PR20540:
    662 ; SSSE3:       # %bb.0:
    663 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
    664 ; SSSE3-NEXT:    retq
    665 ;
    666 ; SSE41-LABEL: PR20540:
    667 ; SSE41:       # %bb.0:
    668 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
    669 ; SSE41-NEXT:    retq
    670 ;
    671 ; AVX-LABEL: PR20540:
    672 ; AVX:       # %bb.0:
    673 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
    674 ; AVX-NEXT:    retq
    675   %shuffle = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
    676   ret <16 x i8> %shuffle
    677 }
    678 
    679 define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
    680 ; SSE-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
    681 ; SSE:       # %bb.0:
    682 ; SSE-NEXT:    movzbl %dil, %eax
    683 ; SSE-NEXT:    movd %eax, %xmm0
    684 ; SSE-NEXT:    retq
    685 ;
    686 ; AVX-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
    687 ; AVX:       # %bb.0:
    688 ; AVX-NEXT:    movzbl %dil, %eax
    689 ; AVX-NEXT:    vmovd %eax, %xmm0
    690 ; AVX-NEXT:    retq
    691   %a = insertelement <16 x i8> undef, i8 %i, i32 0
    692   %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    693   ret <16 x i8> %shuffle
    694 }
    695 
    696 define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
    697 ; SSE2-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
    698 ; SSE2:       # %bb.0:
    699 ; SSE2-NEXT:    shll $8, %edi
    700 ; SSE2-NEXT:    pxor %xmm0, %xmm0
    701 ; SSE2-NEXT:    pinsrw $2, %edi, %xmm0
    702 ; SSE2-NEXT:    retq
    703 ;
    704 ; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
    705 ; SSSE3:       # %bb.0:
    706 ; SSSE3-NEXT:    shll $8, %edi
    707 ; SSSE3-NEXT:    pxor %xmm0, %xmm0
    708 ; SSSE3-NEXT:    pinsrw $2, %edi, %xmm0
    709 ; SSSE3-NEXT:    retq
    710 ;
    711 ; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
    712 ; SSE41:       # %bb.0:
    713 ; SSE41-NEXT:    pxor %xmm0, %xmm0
    714 ; SSE41-NEXT:    pinsrb $5, %edi, %xmm0
    715 ; SSE41-NEXT:    retq
    716 ;
    717 ; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
    718 ; AVX:       # %bb.0:
    719 ; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
    720 ; AVX-NEXT:    vpinsrb $5, %edi, %xmm0, %xmm0
    721 ; AVX-NEXT:    retq
    722   %a = insertelement <16 x i8> undef, i8 %i, i32 0
    723   %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    724   ret <16 x i8> %shuffle
    725 }
    726 
    727 define <16 x i8> @shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16(i8 %i) {
    728 ; SSE2-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
    729 ; SSE2:       # %bb.0:
    730 ; SSE2-NEXT:    shll $8, %edi
    731 ; SSE2-NEXT:    pxor %xmm0, %xmm0
    732 ; SSE2-NEXT:    pinsrw $7, %edi, %xmm0
    733 ; SSE2-NEXT:    retq
    734 ;
    735 ; SSSE3-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
    736 ; SSSE3:       # %bb.0:
    737 ; SSSE3-NEXT:    shll $8, %edi
    738 ; SSSE3-NEXT:    pxor %xmm0, %xmm0
    739 ; SSSE3-NEXT:    pinsrw $7, %edi, %xmm0
    740 ; SSSE3-NEXT:    retq
    741 ;
    742 ; SSE41-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
    743 ; SSE41:       # %bb.0:
    744 ; SSE41-NEXT:    pxor %xmm0, %xmm0
    745 ; SSE41-NEXT:    pinsrb $15, %edi, %xmm0
    746 ; SSE41-NEXT:    retq
    747 ;
    748 ; AVX-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
    749 ; AVX:       # %bb.0:
    750 ; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
    751 ; AVX-NEXT:    vpinsrb $15, %edi, %xmm0, %xmm0
    752 ; AVX-NEXT:    retq
    753   %a = insertelement <16 x i8> undef, i8 %i, i32 0
    754   %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16>
    755   ret <16 x i8> %shuffle
    756 }
    757 
    758 define <16 x i8> @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
    759 ; SSE2-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
    760 ; SSE2:       # %bb.0:
    761 ; SSE2-NEXT:    movzbl %dil, %eax
    762 ; SSE2-NEXT:    pxor %xmm0, %xmm0
    763 ; SSE2-NEXT:    pinsrw $1, %eax, %xmm0
    764 ; SSE2-NEXT:    retq
    765 ;
    766 ; SSSE3-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
    767 ; SSSE3:       # %bb.0:
    768 ; SSSE3-NEXT:    movzbl %dil, %eax
    769 ; SSSE3-NEXT:    pxor %xmm0, %xmm0
    770 ; SSSE3-NEXT:    pinsrw $1, %eax, %xmm0
    771 ; SSSE3-NEXT:    retq
    772 ;
    773 ; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
    774 ; SSE41:       # %bb.0:
    775 ; SSE41-NEXT:    pxor %xmm0, %xmm0
    776 ; SSE41-NEXT:    pinsrb $2, %edi, %xmm0
    777 ; SSE41-NEXT:    retq
    778 ;
    779 ; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
    780 ; AVX:       # %bb.0:
    781 ; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
    782 ; AVX-NEXT:    vpinsrb $2, %edi, %xmm0, %xmm0
    783 ; AVX-NEXT:    retq
    784   %a = insertelement <16 x i8> undef, i8 %i, i32 3
    785   %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    786   ret <16 x i8> %shuffle
    787 }
    788 
    789 define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu(<16 x i8> %a) {
    790 ; SSE-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu:
    791 ; SSE:       # %bb.0:
    792 ; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
    793 ; SSE-NEXT:    retq
    794 ;
    795 ; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu:
    796 ; AVX:       # %bb.0:
    797 ; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
    798 ; AVX-NEXT:    retq
    799   %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 undef, i32 18, i32 undef>
    800   ret <16 x i8> %shuffle
    801 }
    802 
    803 define <16 x i8> @shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(<16 x i8> %a) {
    804 ; SSE-LABEL: shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
    805 ; SSE:       # %bb.0:
    806 ; SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
    807 ; SSE-NEXT:    retq
    808 ;
    809 ; AVX-LABEL: shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
    810 ; AVX:       # %bb.0:
    811 ; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
    812 ; AVX-NEXT:    retq
    813   %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 28, i32 undef, i32 30, i32 31, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 09, i32 0, i32 0, i32 0, i32 0, i32 0>
    814   ret <16 x i8> %shuffle
    815 }
    816 
    817 define <16 x i8> @shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14(<16 x i8> %a, <16 x i8> %b) {
    818 ; SSE2-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
    819 ; SSE2:       # %bb.0:
    820 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
    821 ; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    822 ; SSE2-NEXT:    por %xmm1, %xmm0
    823 ; SSE2-NEXT:    retq
    824 ;
    825 ; SSSE3-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
    826 ; SSSE3:       # %bb.0:
    827 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    828 ; SSSE3-NEXT:    retq
    829 ;
    830 ; SSE41-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
    831 ; SSE41:       # %bb.0:
    832 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    833 ; SSE41-NEXT:    retq
    834 ;
    835 ; AVX-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
    836 ; AVX:       # %bb.0:
    837 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    838 ; AVX-NEXT:    retq
    839   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
    840   ret <16 x i8> %shuffle
    841 }
    842 
    843 define <16 x i8> @shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14(<16 x i8> %a, <16 x i8> %b) {
    844 ; SSE2-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
    845 ; SSE2:       # %bb.0:
    846 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    847 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
    848 ; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    849 ; SSE2-NEXT:    por %xmm1, %xmm0
    850 ; SSE2-NEXT:    retq
    851 ;
    852 ; SSSE3-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
    853 ; SSSE3:       # %bb.0:
    854 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    855 ; SSSE3-NEXT:    retq
    856 ;
    857 ; SSE41-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
    858 ; SSE41:       # %bb.0:
    859 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    860 ; SSE41-NEXT:    retq
    861 ;
    862 ; AVX-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
    863 ; AVX:       # %bb.0:
    864 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    865 ; AVX-NEXT:    retq
    866   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
    867   ret <16 x i8> %shuffle
    868 }
    869 
    870 define <16 x i8> @shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00(<16 x i8> %a, <16 x i8> %b) {
    871 ; SSE2-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:
    872 ; SSE2:       # %bb.0:
    873 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
    874 ; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
    875 ; SSE2-NEXT:    por %xmm1, %xmm0
    876 ; SSE2-NEXT:    retq
    877 ;
    878 ; SSSE3-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:
    879 ; SSSE3:       # %bb.0:
    880 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
    881 ; SSSE3-NEXT:    retq
    882 ;
    883 ; SSE41-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:
    884 ; SSE41:       # %bb.0:
    885 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
    886 ; SSE41-NEXT:    retq
    887 ;
    888 ; AVX-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:
    889 ; AVX:       # %bb.0:
    890 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
    891 ; AVX-NEXT:    retq
    892   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0>
    893   ret <16 x i8> %shuffle
    894 }
    895 
    896 define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16(<16 x i8> %a, <16 x i8> %b) {
    897 ; SSE2-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:
    898 ; SSE2:       # %bb.0:
    899 ; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
    900 ; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
    901 ; SSE2-NEXT:    por %xmm1, %xmm0
    902 ; SSE2-NEXT:    retq
    903 ;
    904 ; SSSE3-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:
    905 ; SSSE3:       # %bb.0:
    906 ; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
    907 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    908 ; SSSE3-NEXT:    retq
    909 ;
    910 ; SSE41-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:
    911 ; SSE41:       # %bb.0:
    912 ; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
    913 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    914 ; SSE41-NEXT:    retq
    915 ;
    916 ; AVX-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:
    917 ; AVX:       # %bb.0:
    918 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
    919 ; AVX-NEXT:    retq
    920   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
    921   ret <16 x i8> %shuffle
    922 }
    923 
    924 define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00(<16 x i8> %a, <16 x i8> %b) {
    925 ; SSE2-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:
    926 ; SSE2:       # %bb.0:
    927 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    928 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
    929 ; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
    930 ; SSE2-NEXT:    por %xmm1, %xmm0
    931 ; SSE2-NEXT:    retq
    932 ;
    933 ; SSSE3-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:
    934 ; SSSE3:       # %bb.0:
    935 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
    936 ; SSSE3-NEXT:    retq
    937 ;
    938 ; SSE41-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:
    939 ; SSE41:       # %bb.0:
    940 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
    941 ; SSE41-NEXT:    retq
    942 ;
    943 ; AVX-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:
    944 ; AVX:       # %bb.0:
    945 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
    946 ; AVX-NEXT:    retq
    947   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0>
    948   ret <16 x i8> %shuffle
    949 }
    950 
    951 define <16 x i8> @shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<16 x i8> %a, <16 x i8> %b) {
    952 ; SSE2-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
    953 ; SSE2:       # %bb.0:
    954 ; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
    955 ; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    956 ; SSE2-NEXT:    por %xmm1, %xmm0
    957 ; SSE2-NEXT:    retq
    958 ;
    959 ; SSSE3-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
    960 ; SSSE3:       # %bb.0:
    961 ; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    962 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    963 ; SSSE3-NEXT:    retq
    964 ;
    965 ; SSE41-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
    966 ; SSE41:       # %bb.0:
    967 ; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    968 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    969 ; SSE41-NEXT:    retq
    970 ;
    971 ; AVX-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
    972 ; AVX:       # %bb.0:
    973 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    974 ; AVX-NEXT:    retq
    975   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
    976   ret <16 x i8> %shuffle
    977 }
    978 
    979 ; PR31151
    980 define <16 x i8> @shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23(<16 x i8> %val1, <16 x i8> %val2) {
    981 ; SSE2-LABEL: shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23:
    982 ; SSE2:       # %bb.0:
    983 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    984 ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,2,1,3]
    985 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
    986 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    987 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
    988 ; SSE2-NEXT:    pand %xmm1, %xmm0
    989 ; SSE2-NEXT:    pandn %xmm2, %xmm1
    990 ; SSE2-NEXT:    por %xmm0, %xmm1
    991 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    992 ; SSE2-NEXT:    retq
    993 ;
    994 ; SSSE3-LABEL: shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23:
    995 ; SSSE3:       # %bb.0:
    996 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    997 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
    998 ; SSSE3-NEXT:    retq
    999 ;
   1000 ; SSE41-LABEL: shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23:
   1001 ; SSE41:       # %bb.0:
   1002 ; SSE41-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1003 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
   1004 ; SSE41-NEXT:    retq
   1005 ;
   1006 ; AVX-LABEL: shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23:
   1007 ; AVX:       # %bb.0:
   1008 ; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1009 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
   1010 ; AVX-NEXT:    retq
   1011   %shuffle = shufflevector <16 x i8> %val1, <16 x i8> %val2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23>
   1012   ret <16 x i8> %shuffle
   1013 }
   1014 
   1015 define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %a) {
   1016 ; SSE2-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
   1017 ; SSE2:       # %bb.0:
   1018 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
   1019 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
   1020 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
   1021 ; SSE2-NEXT:    retq
   1022 ;
   1023 ; SSSE3-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
   1024 ; SSSE3:       # %bb.0:
   1025 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
   1026 ; SSSE3-NEXT:    retq
   1027 ;
   1028 ; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
   1029 ; SSE41:       # %bb.0:
   1030 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
   1031 ; SSE41-NEXT:    retq
   1032 ;
   1033 ; AVX-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
   1034 ; AVX:       # %bb.0:
   1035 ; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
   1036 ; AVX-NEXT:    retq
   1037   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1038   ret <16 x i8> %shuffle
   1039 }
   1040 
   1041 define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz(<16 x i8> %a) {
   1042 ; SSE2-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
   1043 ; SSE2:       # %bb.0:
   1044 ; SSE2-NEXT:    pxor %xmm1, %xmm1
   1045 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1046 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1047 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   1048 ; SSE2-NEXT:    retq
   1049 ;
   1050 ; SSSE3-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
   1051 ; SSSE3:       # %bb.0:
   1052 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
   1053 ; SSSE3-NEXT:    retq
   1054 ;
   1055 ; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
   1056 ; SSE41:       # %bb.0:
   1057 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
   1058 ; SSE41-NEXT:    retq
   1059 ;
   1060 ; AVX-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
   1061 ; AVX:       # %bb.0:
   1062 ; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
   1063 ; AVX-NEXT:    retq
   1064   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 1, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
   1065   ret <16 x i8> %shuffle
   1066 }
   1067 
   1068 define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu(<16 x i8> %a) {
   1069 ; SSE2-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
   1070 ; SSE2:       # %bb.0:
   1071 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
   1072 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
   1073 ; SSE2-NEXT:    retq
   1074 ;
   1075 ; SSSE3-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
   1076 ; SSSE3:       # %bb.0:
   1077 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
   1078 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
   1079 ; SSSE3-NEXT:    retq
   1080 ;
   1081 ; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
   1082 ; SSE41:       # %bb.0:
   1083 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1084 ; SSE41-NEXT:    retq
   1085 ;
   1086 ; AVX-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
   1087 ; AVX:       # %bb.0:
   1088 ; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1089 ; AVX-NEXT:    retq
   1090   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef>
   1091   ret <16 x i8> %shuffle
   1092 }
   1093 
   1094 define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz(<16 x i8> %a) {
   1095 ; SSE2-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
   1096 ; SSE2:       # %bb.0:
   1097 ; SSE2-NEXT:    pxor %xmm1, %xmm1
   1098 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1099 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1100 ; SSE2-NEXT:    retq
   1101 ;
   1102 ; SSSE3-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
   1103 ; SSSE3:       # %bb.0:
   1104 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
   1105 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1106 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1107 ; SSSE3-NEXT:    retq
   1108 ;
   1109 ; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
   1110 ; SSE41:       # %bb.0:
   1111 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1112 ; SSE41-NEXT:    retq
   1113 ;
   1114 ; AVX-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
   1115 ; AVX:       # %bb.0:
   1116 ; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1117 ; AVX-NEXT:    retq
   1118   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 1, i32 21, i32 22, i32 23, i32 2, i32 25, i32 26, i32 27, i32 3, i32 29, i32 30, i32 31>
   1119   ret <16 x i8> %shuffle
   1120 }
   1121 
   1122 define <16 x i8> @shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu(<16 x i8> %a) {
   1123 ; SSE2-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
   1124 ; SSE2:       # %bb.0:
   1125 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
   1126 ; SSE2-NEXT:    retq
   1127 ;
   1128 ; SSSE3-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
   1129 ; SSSE3:       # %bb.0:
   1130 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
   1131 ; SSSE3-NEXT:    retq
   1132 ;
   1133 ; SSE41-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
   1134 ; SSE41:       # %bb.0:
   1135 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1136 ; SSE41-NEXT:    retq
   1137 ;
   1138 ; AVX-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
   1139 ; AVX:       # %bb.0:
   1140 ; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1141 ; AVX-NEXT:    retq
   1142   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7, i32 undef>
   1143   ret <16 x i8> %shuffle
   1144 }
   1145 
   1146 define <16 x i8> @shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz(<16 x i8> %a) {
   1147 ; SSE2-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
   1148 ; SSE2:       # %bb.0:
   1149 ; SSE2-NEXT:    pxor %xmm1, %xmm1
   1150 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1151 ; SSE2-NEXT:    retq
   1152 ;
   1153 ; SSSE3-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
   1154 ; SSSE3:       # %bb.0:
   1155 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
   1156 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1157 ; SSSE3-NEXT:    retq
   1158 ;
   1159 ; SSE41-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
   1160 ; SSE41:       # %bb.0:
   1161 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1162 ; SSE41-NEXT:    retq
   1163 ;
   1164 ; AVX-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
   1165 ; AVX:       # %bb.0:
   1166 ; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1167 ; AVX-NEXT:    retq
   1168   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 1, i32 19, i32 2, i32 21, i32 3, i32 23, i32 4, i32 25, i32 5, i32 27, i32 6, i32 29, i32 7, i32 31>
   1169   ret <16 x i8> %shuffle
   1170 }
   1171 
   1172 define <16 x i8> @shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00(<16 x i8> %a, <16 x i8> %b) {
   1173 ; SSE2-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
   1174 ; SSE2:       # %bb.0: # %entry
   1175 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   1176 ; SSE2-NEXT:    movdqa %xmm0, %xmm3
   1177 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
   1178 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm4 = xmm3[1,3,2,0,4,5,6,7]
   1179 ; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[0,0,2,1]
   1180 ; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [65535,65535,65535,0,65535,0,0,65535]
   1181 ; SSE2-NEXT:    pand %xmm5, %xmm4
   1182 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
   1183 ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,3,0,1]
   1184 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,1,2,2,4,5,6,7]
   1185 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,7,7]
   1186 ; SSE2-NEXT:    pandn %xmm2, %xmm5
   1187 ; SSE2-NEXT:    por %xmm4, %xmm5
   1188 ; SSE2-NEXT:    psrlq $16, %xmm0
   1189 ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
   1190 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[3,1,1,3]
   1191 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
   1192 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,4]
   1193 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
   1194 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
   1195 ; SSE2-NEXT:    packuswb %xmm5, %xmm2
   1196 ; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [255,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
   1197 ; SSE2-NEXT:    pand %xmm0, %xmm2
   1198 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[3,1,1,3,4,5,6,7]
   1199 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
   1200 ; SSE2-NEXT:    pandn %xmm1, %xmm0
   1201 ; SSE2-NEXT:    por %xmm2, %xmm0
   1202 ; SSE2-NEXT:    retq
   1203 ;
   1204 ; SSSE3-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
   1205 ; SSSE3:       # %bb.0: # %entry
   1206 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero
   1207 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0]
   1208 ; SSSE3-NEXT:    por %xmm1, %xmm0
   1209 ; SSSE3-NEXT:    retq
   1210 ;
   1211 ; SSE41-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
   1212 ; SSE41:       # %bb.0: # %entry
   1213 ; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero
   1214 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0]
   1215 ; SSE41-NEXT:    por %xmm1, %xmm0
   1216 ; SSE41-NEXT:    retq
   1217 ;
   1218 ; AVX1OR2-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
   1219 ; AVX1OR2:       # %bb.0: # %entry
   1220 ; AVX1OR2-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero
   1221 ; AVX1OR2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0]
   1222 ; AVX1OR2-NEXT:    vpor %xmm1, %xmm0, %xmm0
   1223 ; AVX1OR2-NEXT:    retq
   1224 ;
   1225 ; AVX512VLBW-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
   1226 ; AVX512VLBW:       # %bb.0: # %entry
   1227 ; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero
   1228 ; AVX512VLBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0]
   1229 ; AVX512VLBW-NEXT:    vpor %xmm1, %xmm0, %xmm0
   1230 ; AVX512VLBW-NEXT:    retq
   1231 ;
   1232 ; AVX512VLVBMI-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
   1233 ; AVX512VLVBMI:       # %bb.0: # %entry
   1234 ; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} xmm2 = <u,10,2,7,22,14,7,2,18,3,1,14,18,9,11,0>
   1235 ; AVX512VLVBMI-NEXT:    vpermt2b %xmm1, %xmm2, %xmm0
   1236 ; AVX512VLVBMI-NEXT:    retq
   1237 entry:
   1238   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 undef, i32 10, i32 2, i32 7, i32 22, i32 14, i32 7, i32 2, i32 18, i32 3, i32 1, i32 14, i32 18, i32 9, i32 11, i32 0>
   1239 
   1240   ret <16 x i8> %shuffle
   1241 }
   1242 
   1243 define <16 x i8> @shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30(<8 x i16> %a0, <8 x i16> %a1) {
   1244 ; SSE-LABEL: shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30:
   1245 ; SSE:       # %bb.0:
   1246 ; SSE-NEXT:    psrlw $8, %xmm0
   1247 ; SSE-NEXT:    psrlw $8, %xmm1
   1248 ; SSE-NEXT:    packuswb %xmm1, %xmm0
   1249 ; SSE-NEXT:    retq
   1250 ;
   1251 ; AVX-LABEL: shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30:
   1252 ; AVX:       # %bb.0:
   1253 ; AVX-NEXT:    vpsrlw $8, %xmm0, %xmm0
   1254 ; AVX-NEXT:    vpsrlw $8, %xmm1, %xmm1
   1255 ; AVX-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
   1256 ; AVX-NEXT:    retq
   1257   %1 = lshr <8 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
   1258   %2 = lshr <8 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
   1259   %3 = bitcast <8 x i16> %1 to <16 x i8>
   1260   %4 = bitcast <8 x i16> %2 to <16 x i8>
   1261   %5 = shufflevector <16 x i8> %3, <16 x i8> %4, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
   1262   ret <16 x i8> %5
   1263 }
   1264 
   1265 define <16 x i8> @stress_test2(<16 x i8> %s.0.0, <16 x i8> %s.0.1, <16 x i8> %s.0.2) {
   1266 ; Nothing interesting to test here. Just make sure we didn't crashe.
   1267 ; ALL-LABEL: stress_test2:
   1268 ; ALL:         retq
   1269 entry:
   1270   %s.1.0 = shufflevector <16 x i8> %s.0.0, <16 x i8> %s.0.1, <16 x i32> <i32 29, i32 30, i32 2, i32 16, i32 26, i32 21, i32 11, i32 26, i32 26, i32 3, i32 4, i32 5, i32 30, i32 28, i32 15, i32 5>
   1271   %s.1.1 = shufflevector <16 x i8> %s.0.1, <16 x i8> %s.0.2, <16 x i32> <i32 31, i32 1, i32 24, i32 12, i32 28, i32 5, i32 2, i32 9, i32 29, i32 1, i32 31, i32 5, i32 6, i32 17, i32 15, i32 22>
   1272   %s.2.0 = shufflevector <16 x i8> %s.1.0, <16 x i8> %s.1.1, <16 x i32> <i32 22, i32 1, i32 12, i32 3, i32 30, i32 4, i32 30, i32 undef, i32 1, i32 10, i32 14, i32 18, i32 27, i32 13, i32 16, i32 19>
   1273 
   1274   ret <16 x i8> %s.2.0
   1275 }
   1276 
   1277 define void @constant_gets_selected(<4 x i32>* %ptr1, <4 x i32>* %ptr2) {
   1278 ; SSE-LABEL: constant_gets_selected:
   1279 ; SSE:       # %bb.0: # %entry
   1280 ; SSE-NEXT:    xorps %xmm0, %xmm0
   1281 ; SSE-NEXT:    movaps %xmm0, (%rdi)
   1282 ; SSE-NEXT:    movaps %xmm0, (%rsi)
   1283 ; SSE-NEXT:    retq
   1284 ;
   1285 ; AVX-LABEL: constant_gets_selected:
   1286 ; AVX:       # %bb.0: # %entry
   1287 ; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
   1288 ; AVX-NEXT:    vmovaps %xmm0, (%rdi)
   1289 ; AVX-NEXT:    vmovaps %xmm0, (%rsi)
   1290 ; AVX-NEXT:    retq
   1291 entry:
   1292   %weird_zero = bitcast <4 x i32> zeroinitializer to <16 x i8>
   1293   %shuffle.i = shufflevector <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0>, <16 x i8> %weird_zero, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
   1294   %weirder_zero = bitcast <16 x i8> %shuffle.i to <4 x i32>
   1295   store <4 x i32> %weirder_zero, <4 x i32>* %ptr1, align 16
   1296   store <4 x i32> zeroinitializer, <4 x i32>* %ptr2, align 16
   1297   ret void
   1298 }
   1299 
   1300 ;
   1301 ; Shuffle to logical bit shifts
   1302 ;
   1303 
   1304 define <16 x i8> @shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14(<16 x i8> %a, <16 x i8> %b) {
   1305 ; SSE-LABEL: shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
   1306 ; SSE:       # %bb.0:
   1307 ; SSE-NEXT:    psllw $8, %xmm0
   1308 ; SSE-NEXT:    retq
   1309 ;
   1310 ; AVX-LABEL: shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
   1311 ; AVX:       # %bb.0:
   1312 ; AVX-NEXT:    vpsllw $8, %xmm0, %xmm0
   1313 ; AVX-NEXT:    retq
   1314   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 0, i32 16, i32 2, i32 16, i32 4, i32 16, i32 6, i32 16, i32 8, i32 16, i32 10, i32 16, i32 12, i32 16, i32 14>
   1315   ret <16 x i8> %shuffle
   1316 }
   1317 
   1318 define <16 x i8> @shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12(<16 x i8> %a, <16 x i8> %b) {
   1319 ; SSE-LABEL: shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
   1320 ; SSE:       # %bb.0:
   1321 ; SSE-NEXT:    pslld $24, %xmm0
   1322 ; SSE-NEXT:    retq
   1323 ;
   1324 ; AVX-LABEL: shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
   1325 ; AVX:       # %bb.0:
   1326 ; AVX-NEXT:    vpslld $24, %xmm0, %xmm0
   1327 ; AVX-NEXT:    retq
   1328   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 12>
   1329   ret <16 x i8> %shuffle
   1330 }
   1331 
   1332 define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08(<16 x i8> %a, <16 x i8> %b) {
   1333 ; SSE-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08:
   1334 ; SSE:       # %bb.0:
   1335 ; SSE-NEXT:    psllq $56, %xmm0
   1336 ; SSE-NEXT:    retq
   1337 ;
   1338 ; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08:
   1339 ; AVX:       # %bb.0:
   1340 ; AVX-NEXT:    vpsllq $56, %xmm0, %xmm0
   1341 ; AVX-NEXT:    retq
   1342   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 8>
   1343   ret <16 x i8> %shuffle
   1344 }
   1345 
   1346 define <16 x i8> @shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14(<16 x i8> %a, <16 x i8> %b) {
   1347 ; SSE-LABEL: shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14:
   1348 ; SSE:       # %bb.0:
   1349 ; SSE-NEXT:    psllq $8, %xmm0
   1350 ; SSE-NEXT:    retq
   1351 ;
   1352 ; AVX-LABEL: shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14:
   1353 ; AVX:       # %bb.0:
   1354 ; AVX-NEXT:    vpsllq $8, %xmm0, %xmm0
   1355 ; AVX-NEXT:    retq
   1356   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 0, i32 undef, i32 2, i32 3, i32 undef, i32 5, i32 6, i32 16, i32 8, i32 9, i32 undef, i32 11, i32 12, i32 13, i32 14>
   1357   ret <16 x i8> %shuffle
   1358 }
   1359 
   1360 define <16 x i8> @shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz(<16 x i8> %a, <16 x i8> %b) {
   1361 ; SSE-LABEL: shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz:
   1362 ; SSE:       # %bb.0:
   1363 ; SSE-NEXT:    psrlw $8, %xmm0
   1364 ; SSE-NEXT:    retq
   1365 ;
   1366 ; AVX-LABEL: shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz:
   1367 ; AVX:       # %bb.0:
   1368 ; AVX-NEXT:    vpsrlw $8, %xmm0, %xmm0
   1369 ; AVX-NEXT:    retq
   1370   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 undef, i32 16, i32 undef, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15, i32 16>
   1371   ret <16 x i8> %shuffle
   1372 }
   1373 
   1374 define <16 x i8> @shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz(<16 x i8> %a, <16 x i8> %b) {
   1375 ; SSE-LABEL: shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz:
   1376 ; SSE:       # %bb.0:
   1377 ; SSE-NEXT:    psrld $16, %xmm0
   1378 ; SSE-NEXT:    retq
   1379 ;
   1380 ; AVX-LABEL: shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz:
   1381 ; AVX:       # %bb.0:
   1382 ; AVX-NEXT:    vpsrld $16, %xmm0, %xmm0
   1383 ; AVX-NEXT:    retq
   1384   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 14, i32 15, i32 16, i32 16>
   1385   ret <16 x i8> %shuffle
   1386 }
   1387 
   1388 define <16 x i8> @shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz(<16 x i8> %a, <16 x i8> %b) {
   1389 ; SSE-LABEL: shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz:
   1390 ; SSE:       # %bb.0:
   1391 ; SSE-NEXT:    psrlq $56, %xmm0
   1392 ; SSE-NEXT:    retq
   1393 ;
   1394 ; AVX-LABEL: shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz:
   1395 ; AVX:       # %bb.0:
   1396 ; AVX-NEXT:    vpsrlq $56, %xmm0, %xmm0
   1397 ; AVX-NEXT:    retq
   1398   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 16>
   1399   ret <16 x i8> %shuffle
   1400 }
   1401 
   1402 define <16 x i8> @PR12412(<16 x i8> %inval1, <16 x i8> %inval2) {
   1403 ; SSE2-LABEL: PR12412:
   1404 ; SSE2:       # %bb.0: # %entry
   1405 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
   1406 ; SSE2-NEXT:    pand %xmm2, %xmm1
   1407 ; SSE2-NEXT:    pand %xmm2, %xmm0
   1408 ; SSE2-NEXT:    packuswb %xmm1, %xmm0
   1409 ; SSE2-NEXT:    retq
   1410 ;
   1411 ; SSSE3-LABEL: PR12412:
   1412 ; SSSE3:       # %bb.0: # %entry
   1413 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
   1414 ; SSSE3-NEXT:    pshufb %xmm2, %xmm1
   1415 ; SSSE3-NEXT:    pshufb %xmm2, %xmm0
   1416 ; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1417 ; SSSE3-NEXT:    retq
   1418 ;
   1419 ; SSE41-LABEL: PR12412:
   1420 ; SSE41:       # %bb.0: # %entry
   1421 ; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
   1422 ; SSE41-NEXT:    pshufb %xmm2, %xmm1
   1423 ; SSE41-NEXT:    pshufb %xmm2, %xmm0
   1424 ; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1425 ; SSE41-NEXT:    retq
   1426 ;
   1427 ; AVX-LABEL: PR12412:
   1428 ; AVX:       # %bb.0: # %entry
   1429 ; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
   1430 ; AVX-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
   1431 ; AVX-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   1432 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1433 ; AVX-NEXT:    retq
   1434 entry:
   1435   %0 = shufflevector <16 x i8> %inval1, <16 x i8> %inval2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
   1436   ret <16 x i8> %0
   1437 }
   1438 
   1439 define <16 x i8> @shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz(<16 x i8> %a) {
   1440 ; SSE-LABEL: shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz:
   1441 ; SSE:       # %bb.0:
   1442 ; SSE-NEXT:    psrld $8, %xmm0
   1443 ; SSE-NEXT:    retq
   1444 ;
   1445 ; AVX-LABEL: shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz:
   1446 ; AVX:       # %bb.0:
   1447 ; AVX-NEXT:    vpsrld $8, %xmm0, %xmm0
   1448 ; AVX-NEXT:    retq
   1449   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 2, i32 3, i32 16, i32 undef, i32 6, i32 7, i32 16, i32 undef, i32 10, i32 11, i32 16, i32 undef, i32 14, i32 15, i32 16>
   1450   ret <16 x i8> %shuffle
   1451 }
   1452 
   1453 define <16 x i8> @shuffle_v16i8_bitcast_unpack(<16 x i8> %a, <16 x i8> %b) {
   1454 ; SSE-LABEL: shuffle_v16i8_bitcast_unpack:
   1455 ; SSE:       # %bb.0:
   1456 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1457 ; SSE-NEXT:    retq
   1458 ;
   1459 ; AVX-LABEL: shuffle_v16i8_bitcast_unpack:
   1460 ; AVX:       # %bb.0:
   1461 ; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1462 ; AVX-NEXT:    retq
   1463   %shuffle8  = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 7, i32 23, i32 6, i32 22, i32 5, i32 21, i32 4, i32 20, i32 3, i32 19, i32 2, i32 18, i32 1, i32 17, i32 0, i32 16>
   1464   %bitcast32 = bitcast <16 x i8> %shuffle8 to <4 x float>
   1465   %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   1466   %bitcast16 = bitcast <4 x float> %shuffle32 to <8 x i16>
   1467   %shuffle16 = shufflevector <8 x i16> %bitcast16, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
   1468   %bitcast8  = bitcast <8 x i16> %shuffle16 to <16 x i8>
   1469   ret <16 x i8> %bitcast8
   1470 }
   1471 
   1472 define <16 x i8> @insert_dup_mem_v16i8_i32(i32* %ptr) {
   1473 ; SSE2-LABEL: insert_dup_mem_v16i8_i32:
   1474 ; SSE2:       # %bb.0:
   1475 ; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   1476 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
   1477 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
   1478 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
   1479 ; SSE2-NEXT:    retq
   1480 ;
   1481 ; SSSE3-LABEL: insert_dup_mem_v16i8_i32:
   1482 ; SSSE3:       # %bb.0:
   1483 ; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   1484 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
   1485 ; SSSE3-NEXT:    pshufb %xmm1, %xmm0
   1486 ; SSSE3-NEXT:    retq
   1487 ;
   1488 ; SSE41-LABEL: insert_dup_mem_v16i8_i32:
   1489 ; SSE41:       # %bb.0:
   1490 ; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   1491 ; SSE41-NEXT:    pxor %xmm1, %xmm1
   1492 ; SSE41-NEXT:    pshufb %xmm1, %xmm0
   1493 ; SSE41-NEXT:    retq
   1494 ;
   1495 ; AVX1-LABEL: insert_dup_mem_v16i8_i32:
   1496 ; AVX1:       # %bb.0:
   1497 ; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   1498 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1499 ; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
   1500 ; AVX1-NEXT:    retq
   1501 ;
   1502 ; AVX2OR512VL-LABEL: insert_dup_mem_v16i8_i32:
   1503 ; AVX2OR512VL:       # %bb.0:
   1504 ; AVX2OR512VL-NEXT:    vpbroadcastb (%rdi), %xmm0
   1505 ; AVX2OR512VL-NEXT:    retq
   1506   %tmp = load i32, i32* %ptr, align 4
   1507   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
   1508   %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
   1509   %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> zeroinitializer
   1510   ret <16 x i8> %tmp3
   1511 }
   1512 
   1513 define <16 x i8> @insert_dup_mem_v16i8_sext_i8(i8* %ptr) {
   1514 ; SSE2-LABEL: insert_dup_mem_v16i8_sext_i8:
   1515 ; SSE2:       # %bb.0:
   1516 ; SSE2-NEXT:    movsbl (%rdi), %eax
   1517 ; SSE2-NEXT:    movd %eax, %xmm0
   1518 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
   1519 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
   1520 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
   1521 ; SSE2-NEXT:    retq
   1522 ;
   1523 ; SSSE3-LABEL: insert_dup_mem_v16i8_sext_i8:
   1524 ; SSSE3:       # %bb.0:
   1525 ; SSSE3-NEXT:    movsbl (%rdi), %eax
   1526 ; SSSE3-NEXT:    movd %eax, %xmm0
   1527 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
   1528 ; SSSE3-NEXT:    pshufb %xmm1, %xmm0
   1529 ; SSSE3-NEXT:    retq
   1530 ;
   1531 ; SSE41-LABEL: insert_dup_mem_v16i8_sext_i8:
   1532 ; SSE41:       # %bb.0:
   1533 ; SSE41-NEXT:    movsbl (%rdi), %eax
   1534 ; SSE41-NEXT:    movd %eax, %xmm0
   1535 ; SSE41-NEXT:    pxor %xmm1, %xmm1
   1536 ; SSE41-NEXT:    pshufb %xmm1, %xmm0
   1537 ; SSE41-NEXT:    retq
   1538 ;
   1539 ; AVX1-LABEL: insert_dup_mem_v16i8_sext_i8:
   1540 ; AVX1:       # %bb.0:
   1541 ; AVX1-NEXT:    movsbl (%rdi), %eax
   1542 ; AVX1-NEXT:    vmovd %eax, %xmm0
   1543 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1544 ; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
   1545 ; AVX1-NEXT:    retq
   1546 ;
   1547 ; AVX2OR512VL-LABEL: insert_dup_mem_v16i8_sext_i8:
   1548 ; AVX2OR512VL:       # %bb.0:
   1549 ; AVX2OR512VL-NEXT:    vpbroadcastb (%rdi), %xmm0
   1550 ; AVX2OR512VL-NEXT:    retq
   1551   %tmp = load i8, i8* %ptr, align 1
   1552   %tmp1 = sext i8 %tmp to i32
   1553   %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
   1554   %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
   1555   %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <16 x i32> zeroinitializer
   1556   ret <16 x i8> %tmp4
   1557 }
   1558 
   1559 define <16 x i8> @insert_dup_elt1_mem_v16i8_i32(i32* %ptr) {
   1560 ; SSE2-LABEL: insert_dup_elt1_mem_v16i8_i32:
   1561 ; SSE2:       # %bb.0:
   1562 ; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   1563 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
   1564 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
   1565 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
   1566 ; SSE2-NEXT:    retq
   1567 ;
   1568 ; SSSE3-LABEL: insert_dup_elt1_mem_v16i8_i32:
   1569 ; SSSE3:       # %bb.0:
   1570 ; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   1571 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
   1572 ; SSSE3-NEXT:    retq
   1573 ;
   1574 ; SSE41-LABEL: insert_dup_elt1_mem_v16i8_i32:
   1575 ; SSE41:       # %bb.0:
   1576 ; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   1577 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
   1578 ; SSE41-NEXT:    retq
   1579 ;
   1580 ; AVX1-LABEL: insert_dup_elt1_mem_v16i8_i32:
   1581 ; AVX1:       # %bb.0:
   1582 ; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   1583 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
   1584 ; AVX1-NEXT:    retq
   1585 ;
   1586 ; AVX2OR512VL-LABEL: insert_dup_elt1_mem_v16i8_i32:
   1587 ; AVX2OR512VL:       # %bb.0:
   1588 ; AVX2OR512VL-NEXT:    vpbroadcastb 1(%rdi), %xmm0
   1589 ; AVX2OR512VL-NEXT:    retq
   1590   %tmp = load i32, i32* %ptr, align 4
   1591   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
   1592   %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
   1593   %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   1594   ret <16 x i8> %tmp3
   1595 }
   1596 
   1597 define <16 x i8> @insert_dup_elt2_mem_v16i8_i32(i32* %ptr) {
   1598 ; SSE2-LABEL: insert_dup_elt2_mem_v16i8_i32:
   1599 ; SSE2:       # %bb.0:
   1600 ; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   1601 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
   1602 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,3,4,5,6,7]
   1603 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
   1604 ; SSE2-NEXT:    retq
   1605 ;
   1606 ; SSSE3-LABEL: insert_dup_elt2_mem_v16i8_i32:
   1607 ; SSSE3:       # %bb.0:
   1608 ; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   1609 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
   1610 ; SSSE3-NEXT:    retq
   1611 ;
   1612 ; SSE41-LABEL: insert_dup_elt2_mem_v16i8_i32:
   1613 ; SSE41:       # %bb.0:
   1614 ; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   1615 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
   1616 ; SSE41-NEXT:    retq
   1617 ;
   1618 ; AVX1-LABEL: insert_dup_elt2_mem_v16i8_i32:
   1619 ; AVX1:       # %bb.0:
   1620 ; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   1621 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
   1622 ; AVX1-NEXT:    retq
   1623 ;
   1624 ; AVX2OR512VL-LABEL: insert_dup_elt2_mem_v16i8_i32:
   1625 ; AVX2OR512VL:       # %bb.0:
   1626 ; AVX2OR512VL-NEXT:    vpbroadcastb 2(%rdi), %xmm0
   1627 ; AVX2OR512VL-NEXT:    retq
   1628   %tmp = load i32, i32* %ptr, align 4
   1629   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
   1630   %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
   1631   %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
   1632   ret <16 x i8> %tmp3
   1633 }
   1634 
   1635 define <16 x i8> @insert_dup_elt1_mem_v16i8_sext_i8(i8* %ptr) {
   1636 ; SSE2-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
   1637 ; SSE2:       # %bb.0:
   1638 ; SSE2-NEXT:    movsbl (%rdi), %eax
   1639 ; SSE2-NEXT:    movd %eax, %xmm0
   1640 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
   1641 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
   1642 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
   1643 ; SSE2-NEXT:    retq
   1644 ;
   1645 ; SSSE3-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
   1646 ; SSSE3:       # %bb.0:
   1647 ; SSSE3-NEXT:    movsbl (%rdi), %eax
   1648 ; SSSE3-NEXT:    movd %eax, %xmm0
   1649 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
   1650 ; SSSE3-NEXT:    retq
   1651 ;
   1652 ; SSE41-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
   1653 ; SSE41:       # %bb.0:
   1654 ; SSE41-NEXT:    movsbl (%rdi), %eax
   1655 ; SSE41-NEXT:    movd %eax, %xmm0
   1656 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
   1657 ; SSE41-NEXT:    retq
   1658 ;
   1659 ; AVX1-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
   1660 ; AVX1:       # %bb.0:
   1661 ; AVX1-NEXT:    movsbl (%rdi), %eax
   1662 ; AVX1-NEXT:    vmovd %eax, %xmm0
   1663 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
   1664 ; AVX1-NEXT:    retq
   1665 ;
   1666 ; AVX2-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
   1667 ; AVX2:       # %bb.0:
   1668 ; AVX2-NEXT:    movsbl (%rdi), %eax
   1669 ; AVX2-NEXT:    shrl $8, %eax
   1670 ; AVX2-NEXT:    vmovd %eax, %xmm0
   1671 ; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
   1672 ; AVX2-NEXT:    retq
   1673 ;
   1674 ; AVX512VL-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
   1675 ; AVX512VL:       # %bb.0:
   1676 ; AVX512VL-NEXT:    movsbl (%rdi), %eax
   1677 ; AVX512VL-NEXT:    shrl $8, %eax
   1678 ; AVX512VL-NEXT:    vpbroadcastb %eax, %xmm0
   1679 ; AVX512VL-NEXT:    retq
   1680   %tmp = load i8, i8* %ptr, align 1
   1681   %tmp1 = sext i8 %tmp to i32
   1682   %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
   1683   %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
   1684   %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   1685   ret <16 x i8> %tmp4
   1686 }
   1687 
   1688 define <16 x i8> @insert_dup_elt2_mem_v16i8_sext_i8(i8* %ptr) {
   1689 ; SSE2-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
   1690 ; SSE2:       # %bb.0:
   1691 ; SSE2-NEXT:    movsbl (%rdi), %eax
   1692 ; SSE2-NEXT:    movd %eax, %xmm0
   1693 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
   1694 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,3,4,5,6,7]
   1695 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
   1696 ; SSE2-NEXT:    retq
   1697 ;
   1698 ; SSSE3-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
   1699 ; SSSE3:       # %bb.0:
   1700 ; SSSE3-NEXT:    movsbl (%rdi), %eax
   1701 ; SSSE3-NEXT:    movd %eax, %xmm0
   1702 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
   1703 ; SSSE3-NEXT:    retq
   1704 ;
   1705 ; SSE41-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
   1706 ; SSE41:       # %bb.0:
   1707 ; SSE41-NEXT:    movsbl (%rdi), %eax
   1708 ; SSE41-NEXT:    movd %eax, %xmm0
   1709 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
   1710 ; SSE41-NEXT:    retq
   1711 ;
   1712 ; AVX1-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
   1713 ; AVX1:       # %bb.0:
   1714 ; AVX1-NEXT:    movsbl (%rdi), %eax
   1715 ; AVX1-NEXT:    vmovd %eax, %xmm0
   1716 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
   1717 ; AVX1-NEXT:    retq
   1718 ;
   1719 ; AVX2-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
   1720 ; AVX2:       # %bb.0:
   1721 ; AVX2-NEXT:    movsbl (%rdi), %eax
   1722 ; AVX2-NEXT:    shrl $16, %eax
   1723 ; AVX2-NEXT:    vmovd %eax, %xmm0
   1724 ; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
   1725 ; AVX2-NEXT:    retq
   1726 ;
   1727 ; AVX512VL-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
   1728 ; AVX512VL:       # %bb.0:
   1729 ; AVX512VL-NEXT:    movsbl (%rdi), %eax
   1730 ; AVX512VL-NEXT:    shrl $16, %eax
   1731 ; AVX512VL-NEXT:    vpbroadcastb %eax, %xmm0
   1732 ; AVX512VL-NEXT:    retq
   1733   %tmp = load i8, i8* %ptr, align 1
   1734   %tmp1 = sext i8 %tmp to i32
   1735   %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
   1736   %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
   1737   %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
   1738   ret <16 x i8> %tmp4
   1739 }
   1740 
   1741 define <16 x i8> @PR31364(i8* nocapture readonly %a, i8* nocapture readonly %b) {
   1742 ; SSE2-LABEL: PR31364:
   1743 ; SSE2:       # %bb.0:
   1744 ; SSE2-NEXT:    movzbl (%rdi), %eax
   1745 ; SSE2-NEXT:    movzbl (%rsi), %ecx
   1746 ; SSE2-NEXT:    shll $8, %ecx
   1747 ; SSE2-NEXT:    orl %eax, %ecx
   1748 ; SSE2-NEXT:    movzwl %cx, %eax
   1749 ; SSE2-NEXT:    movd %eax, %xmm1
   1750 ; SSE2-NEXT:    pxor %xmm0, %xmm0
   1751 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
   1752 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[1,1,1,3,4,5,6,7]
   1753 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
   1754 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
   1755 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
   1756 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,4,4,4]
   1757 ; SSE2-NEXT:    packuswb %xmm1, %xmm0
   1758 ; SSE2-NEXT:    retq
   1759 ;
   1760 ; SSSE3-LABEL: PR31364:
   1761 ; SSSE3:       # %bb.0:
   1762 ; SSSE3-NEXT:    movzbl (%rdi), %eax
   1763 ; SSSE3-NEXT:    movzbl (%rsi), %ecx
   1764 ; SSSE3-NEXT:    shll $8, %ecx
   1765 ; SSSE3-NEXT:    orl %eax, %ecx
   1766 ; SSSE3-NEXT:    movzwl %cx, %eax
   1767 ; SSSE3-NEXT:    movd %eax, %xmm0
   1768 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1],zero,xmm0[1,1,1,1,1,0,0,0]
   1769 ; SSSE3-NEXT:    retq
   1770 ;
   1771 ; SSE41-LABEL: PR31364:
   1772 ; SSE41:       # %bb.0:
   1773 ; SSE41-NEXT:    pxor %xmm0, %xmm0
   1774 ; SSE41-NEXT:    pinsrb $0, (%rdi), %xmm0
   1775 ; SSE41-NEXT:    pinsrb $1, (%rsi), %xmm0
   1776 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1],zero,xmm0[1,1,1,1,1,0,0,0]
   1777 ; SSE41-NEXT:    retq
   1778 ;
   1779 ; AVX-LABEL: PR31364:
   1780 ; AVX:       # %bb.0:
   1781 ; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
   1782 ; AVX-NEXT:    vpinsrb $0, (%rdi), %xmm0, %xmm0
   1783 ; AVX-NEXT:    vpinsrb $1, (%rsi), %xmm0, %xmm0
   1784 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1],zero,xmm0[1,1,1,1,1,0,0,0]
   1785 ; AVX-NEXT:    retq
   1786   %v0 = load i8, i8* %a, align 1
   1787   %vecins = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %v0, i32 0
   1788   %v1 = load i8, i8* %b, align 1
   1789   %vecins2 = insertelement <16 x i8> %vecins, i8 %v1, i32 1
   1790   %result = shufflevector <16 x i8> %vecins2, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 3, i32 1, i32 1, i32 1, i32 1, i32 1, i32 0, i32 0, i32 0>
   1791   ret <16 x i8> %result
   1792 }
   1793 
   1794 define <16 x i8> @PR31301(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
   1795 ; SSE2-LABEL: PR31301:
   1796 ; SSE2:       # %bb.0: # %entry
   1797 ; SSE2-NEXT:    movzbl (%rdi), %eax
   1798 ; SSE2-NEXT:    movd %eax, %xmm0
   1799 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
   1800 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
   1801 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
   1802 ; SSE2-NEXT:    movzbl (%rsi), %eax
   1803 ; SSE2-NEXT:    movd %eax, %xmm1
   1804 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
   1805 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7]
   1806 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
   1807 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1808 ; SSE2-NEXT:    retq
   1809 ;
   1810 ; SSSE3-LABEL: PR31301:
   1811 ; SSSE3:       # %bb.0: # %entry
   1812 ; SSSE3-NEXT:    movzbl (%rdi), %eax
   1813 ; SSSE3-NEXT:    movd %eax, %xmm0
   1814 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
   1815 ; SSSE3-NEXT:    pshufb %xmm1, %xmm0
   1816 ; SSSE3-NEXT:    movzbl (%rsi), %eax
   1817 ; SSSE3-NEXT:    movd %eax, %xmm2
   1818 ; SSSE3-NEXT:    pshufb %xmm1, %xmm2
   1819 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
   1820 ; SSSE3-NEXT:    retq
   1821 ;
   1822 ; SSE41-LABEL: PR31301:
   1823 ; SSE41:       # %bb.0: # %entry
   1824 ; SSE41-NEXT:    movzbl (%rdi), %eax
   1825 ; SSE41-NEXT:    movd %eax, %xmm0
   1826 ; SSE41-NEXT:    pxor %xmm1, %xmm1
   1827 ; SSE41-NEXT:    pshufb %xmm1, %xmm0
   1828 ; SSE41-NEXT:    movzbl (%rsi), %eax
   1829 ; SSE41-NEXT:    movd %eax, %xmm2
   1830 ; SSE41-NEXT:    pshufb %xmm1, %xmm2
   1831 ; SSE41-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
   1832 ; SSE41-NEXT:    retq
   1833 ;
   1834 ; AVX1-LABEL: PR31301:
   1835 ; AVX1:       # %bb.0: # %entry
   1836 ; AVX1-NEXT:    movzbl (%rdi), %eax
   1837 ; AVX1-NEXT:    vmovd %eax, %xmm0
   1838 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1839 ; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
   1840 ; AVX1-NEXT:    movzbl (%rsi), %eax
   1841 ; AVX1-NEXT:    vmovd %eax, %xmm2
   1842 ; AVX1-NEXT:    vpshufb %xmm1, %xmm2, %xmm1
   1843 ; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1844 ; AVX1-NEXT:    retq
   1845 ;
   1846 ; AVX2OR512VL-LABEL: PR31301:
   1847 ; AVX2OR512VL:       # %bb.0: # %entry
   1848 ; AVX2OR512VL-NEXT:    vpbroadcastb (%rdi), %xmm0
   1849 ; AVX2OR512VL-NEXT:    vpbroadcastb (%rsi), %xmm1
   1850 ; AVX2OR512VL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1851 ; AVX2OR512VL-NEXT:    retq
   1852 entry:
   1853   %0 = load i8, i8* %x, align 1
   1854   %1 = insertelement <16 x i8> undef, i8 %0, i32 0
   1855   %lane = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1856   %2 = load i8, i8* %y, align 1
   1857   %3 = insertelement <16 x i8> undef, i8 %2, i32 0
   1858   %lane3 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1859   %vzip.i = shufflevector <16 x i8> %lane, <16 x i8> %lane3, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
   1860   ret <16 x i8> %vzip.i
   1861 }
   1862