Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
      7 
      8 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
      9 target triple = "x86_64-unknown-unknown"
     10 
     11 define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i8> %a, <16 x i8> %b) {
     12 ; FIXME: SSE2 should look like the following:
     13 ; FIXME-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
     14 ; FIXME:       # BB#0:
     15 ; FIXME-NEXT:    punpcklbw %xmm0, %xmm0
     16 ; FIXME-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
     17 ; FIXME-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
     18 ; FIXME-NEXT:    retq
     19 ;
     20 ; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
     21 ; SSE2:       # BB#0:
     22 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
     23 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
     24 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
     25 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
     26 ; SSE2-NEXT:    retq
     27 ;
     28 ; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
     29 ; SSSE3:       # BB#0:
     30 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
     31 ; SSSE3-NEXT:    pshufb %xmm1, %xmm0
     32 ; SSSE3-NEXT:    retq
     33 ;
     34 ; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
     35 ; SSE41:       # BB#0:
     36 ; SSE41-NEXT:    pxor %xmm1, %xmm1
     37 ; SSE41-NEXT:    pshufb %xmm1, %xmm0
     38 ; SSE41-NEXT:    retq
     39 ;
     40 ; AVX1-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
     41 ; AVX1:       # BB#0:
     42 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
     43 ; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
     44 ; AVX1-NEXT:    retq
     45 ;
     46 ; AVX2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
     47 ; AVX2:       # BB#0:
     48 ; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
     49 ; AVX2-NEXT:    retq
     50   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
     51   ret <16 x i8> %shuffle
     52 }
     53 
     54 define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01(<16 x i8> %a, <16 x i8> %b) {
     55 ; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01:
     56 ; SSE2:       # BB#0:
     57 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
     58 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
     59 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
     60 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
     61 ; SSE2-NEXT:    retq
     62 ;
     63 ; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01:
     64 ; SSSE3:       # BB#0:
     65 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
     66 ; SSSE3-NEXT:    retq
     67 ;
     68 ; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01:
     69 ; SSE41:       # BB#0:
     70 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
     71 ; SSE41-NEXT:    retq
     72 ;
     73 ; AVX-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01:
     74 ; AVX:       # BB#0:
     75 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
     76 ; AVX-NEXT:    retq
     77   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
     78   ret <16 x i8> %shuffle
     79 }
     80 
     81 define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i8> %a, <16 x i8> %b) {
     82 ; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
     83 ; SSE2:       # BB#0:
     84 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
     85 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,2,4,5,6,7]
     86 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
     87 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
     88 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,6]
     89 ; SSE2-NEXT:    retq
     90 ;
     91 ; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
     92 ; SSSE3:       # BB#0:
     93 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
     94 ; SSSE3-NEXT:    retq
     95 ;
     96 ; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
     97 ; SSE41:       # BB#0:
     98 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
     99 ; SSE41-NEXT:    retq
    100 ;
    101 ; AVX-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
    102 ; AVX:       # BB#0:
    103 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
    104 ; AVX-NEXT:    retq
    105   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
    106   ret <16 x i8> %shuffle
    107 }
    108 
    109 define <16 x i8> @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03(<16 x i8> %a, <16 x i8> %b) {
    110 ; SSE-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03:
    111 ; SSE:       # BB#0:
    112 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    113 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
    114 ; SSE-NEXT:    retq
    115 ;
    116 ; AVX-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03:
    117 ; AVX:       # BB#0:
    118 ; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    119 ; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
    120 ; AVX-NEXT:    retq
    121   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
    122   ret <16 x i8> %shuffle
    123 }
    124 
    125 define <16 x i8> @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07(<16 x i8> %a, <16 x i8> %b) {
    126 ; SSE-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07:
    127 ; SSE:       # BB#0:
    128 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    129 ; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
    130 ; SSE-NEXT:    retq
    131 ;
    132 ; AVX-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07:
    133 ; AVX:       # BB#0:
    134 ; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    135 ; AVX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
    136 ; AVX-NEXT:    retq
    137   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7>
    138   ret <16 x i8> %shuffle
    139 }
    140 
    141 define <16 x i8> @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i8> %a, <16 x i8> %b) {
    142 ; SSE2-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
    143 ; SSE2:       # BB#0:
    144 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
    145 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
    146 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    147 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    148 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
    149 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6]
    150 ; SSE2-NEXT:    retq
    151 ;
    152 ; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
    153 ; SSSE3:       # BB#0:
    154 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
    155 ; SSSE3-NEXT:    retq
    156 ;
    157 ; SSE41-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
    158 ; SSE41:       # BB#0:
    159 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
    160 ; SSE41-NEXT:    retq
    161 ;
    162 ; AVX-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
    163 ; AVX:       # BB#0:
    164 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
    165 ; AVX-NEXT:    retq
    166   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
    167   ret <16 x i8> %shuffle
    168 }
    169 
    170 define <16 x i8> @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07(<16 x i8> %a, <16 x i8> %b) {
    171 ; SSE-LABEL: shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07:
    172 ; SSE:       # BB#0:
    173 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    174 ; SSE-NEXT:    retq
    175 ;
    176 ; AVX-LABEL: shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07:
    177 ; AVX:       # BB#0:
    178 ; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    179 ; AVX-NEXT:    retq
    180   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
    181   ret <16 x i8> %shuffle
    182 }
    183 
    184 define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) {
    185 ; FIXME: SSE2 should be the following:
    186 ; FIXME-LABEL: @shuffle_v16i8_0101010101010101
    187 ; FIXME:       # BB#0:
    188 ; FIXME-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
    189 ; FIXME-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
    190 ; FIXME-NEXT:    retq
    191 ;
    192 ; SSE2-LABEL: shuffle_v16i8_0101010101010101:
    193 ; SSE2:       # BB#0:
    194 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
    195 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
    196 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
    197 ; SSE2-NEXT:    retq
    198 ;
    199 ; SSSE3-LABEL: shuffle_v16i8_0101010101010101:
    200 ; SSSE3:       # BB#0:
    201 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    202 ; SSSE3-NEXT:    retq
    203 ;
    204 ; SSE41-LABEL: shuffle_v16i8_0101010101010101:
    205 ; SSE41:       # BB#0:
    206 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    207 ; SSE41-NEXT:    retq
    208 ;
    209 ; AVX1-LABEL: shuffle_v16i8_0101010101010101:
    210 ; AVX1:       # BB#0:
    211 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
    212 ; AVX1-NEXT:    retq
    213 ;
    214 ; AVX2-LABEL: shuffle_v16i8_0101010101010101:
    215 ; AVX2:       # BB#0:
    216 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
    217 ; AVX2-NEXT:    retq
    218   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
    219   ret <16 x i8> %shuffle
    220 }
    221 
    222 define <16 x i8> @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i8> %a, <16 x i8> %b) {
    223 ; SSE-LABEL: shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
    224 ; SSE:       # BB#0:
    225 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    226 ; SSE-NEXT:    retq
    227 ;
    228 ; AVX-LABEL: shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
    229 ; AVX:       # BB#0:
    230 ; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    231 ; AVX-NEXT:    retq
    232   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
    233   ret <16 x i8> %shuffle
    234 }
    235 
    236 define <16 x i8> @shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31(<16 x i8> %a, <16 x i8> %b) {
    237 ; SSE-LABEL: shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31:
    238 ; SSE:       # BB#0:
    239 ; SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
    240 ; SSE-NEXT:    retq
    241 ;
    242 ; AVX-LABEL: shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31:
    243 ; AVX:       # BB#0:
    244 ; AVX-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
    245 ; AVX-NEXT:    retq
    246   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
    247   ret <16 x i8> %shuffle
    248 }
    249 
    250 define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(<16 x i8> %a, <16 x i8> %b) {
    251 ; SSE2-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
    252 ; SSE2:       # BB#0:
    253 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    254 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
    255 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
    256 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
    257 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
    258 ; SSE2-NEXT:    pand %xmm2, %xmm1
    259 ; SSE2-NEXT:    pandn %xmm0, %xmm2
    260 ; SSE2-NEXT:    por %xmm1, %xmm2
    261 ; SSE2-NEXT:    movdqa %xmm2, %xmm0
    262 ; SSE2-NEXT:    retq
    263 ;
    264 ; SSSE3-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
    265 ; SSSE3:       # BB#0:
    266 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    267 ; SSSE3-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
    268 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
    269 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    270 ; SSSE3-NEXT:    retq
    271 ;
    272 ; SSE41-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
    273 ; SSE41:       # BB#0:
    274 ; SSE41-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    275 ; SSE41-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
    276 ; SSE41-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
    277 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    278 ; SSE41-NEXT:    retq
    279 ;
    280 ; AVX1-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
    281 ; AVX1:       # BB#0:
    282 ; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    283 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
    284 ; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
    285 ; AVX1-NEXT:    retq
    286 ;
    287 ; AVX2-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
    288 ; AVX2:       # BB#0:
    289 ; AVX2-NEXT:    vpbroadcastb %xmm1, %xmm1
    290 ; AVX2-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
    291 ; AVX2-NEXT:    retq
    292   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7>
    293   ret <16 x i8> %shuffle
    294 }
    295 
    296 define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12(<16 x i8> %a, <16 x i8> %b) {
    297 ; SSE2-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12:
    298 ; SSE2:       # BB#0:
    299 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    300 ; SSE2-NEXT:    movdqa %xmm0, %xmm2
    301 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
    302 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
    303 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
    304 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    305 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
    306 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
    307 ; SSE2-NEXT:    packuswb %xmm2, %xmm0
    308 ; SSE2-NEXT:    retq
    309 ;
    310 ; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12:
    311 ; SSSE3:       # BB#0:
    312 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
    313 ; SSSE3-NEXT:    retq
    314 ;
    315 ; SSE41-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12:
    316 ; SSE41:       # BB#0:
    317 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
    318 ; SSE41-NEXT:    retq
    319 ;
    320 ; AVX-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12:
    321 ; AVX:       # BB#0:
    322 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
    323 ; AVX-NEXT:    retq
    324   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
    325   ret <16 x i8> %shuffle
    326 }
    327 
    328 define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20(<16 x i8> %a, <16 x i8> %b) {
    329 ; SSE2-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
    330 ; SSE2:       # BB#0:
    331 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    332 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    333 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
    334 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
    335 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
    336 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
    337 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
    338 ; SSE2-NEXT:    packuswb %xmm1, %xmm0
    339 ; SSE2-NEXT:    retq
    340 ;
    341 ; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
    342 ; SSSE3:       # BB#0:
    343 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    344 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9]
    345 ; SSSE3-NEXT:    retq
    346 ;
    347 ; SSE41-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
    348 ; SSE41:       # BB#0:
    349 ; SSE41-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    350 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9]
    351 ; SSE41-NEXT:    retq
    352 ;
    353 ; AVX-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
    354 ; AVX:       # BB#0:
    355 ; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    356 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9]
    357 ; AVX-NEXT:    retq
    358   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20>
    359   ret <16 x i8> %shuffle
    360 }
    361 
    362 define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20(<16 x i8> %a, <16 x i8> %b) {
    363 ; SSE2-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
    364 ; SSE2:       # BB#0:
    365 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
    366 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    367 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    368 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    369 ; SSE2-NEXT:    movdqa %xmm0, %xmm2
    370 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
    371 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm3 = xmm2[3,2,1,0,4,5,6,7]
    372 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    373 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
    374 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
    375 ; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0]
    376 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
    377 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3]
    378 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
    379 ; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    380 ; SSE2-NEXT:    packuswb %xmm3, %xmm0
    381 ; SSE2-NEXT:    retq
    382 ;
    383 ; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
    384 ; SSSE3:       # BB#0:
    385 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u]
    386 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u]
    387 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    388 ; SSSE3-NEXT:    retq
    389 ;
    390 ; SSE41-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
    391 ; SSE41:       # BB#0:
    392 ; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u]
    393 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u]
    394 ; SSE41-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    395 ; SSE41-NEXT:    retq
    396 ;
    397 ; AVX-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
    398 ; AVX:       # BB#0:
    399 ; AVX-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u]
    400 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u]
    401 ; AVX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    402 ; AVX-NEXT:    retq
    403   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 31, i32 30, i32 29, i32 28, i32 11, i32 10, i32 9, i32 8, i32 23, i32 22, i32 21, i32 20>
    404   ret <16 x i8> %shuffle
    405 }
    406 
    407 define <16 x i8> @shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31(<16 x i8> %a, <16 x i8> %b) {
    408 ; SSE2-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
    409 ; SSE2:       # BB#0:
    410 ; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
    411 ; SSE2-NEXT:    andps %xmm2, %xmm0
    412 ; SSE2-NEXT:    andnps %xmm1, %xmm2
    413 ; SSE2-NEXT:    orps %xmm2, %xmm0
    414 ; SSE2-NEXT:    retq
    415 ;
    416 ; SSSE3-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
    417 ; SSSE3:       # BB#0:
    418 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
    419 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
    420 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    421 ; SSSE3-NEXT:    retq
    422 ;
    423 ; SSE41-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
    424 ; SSE41:       # BB#0:
    425 ; SSE41-NEXT:    movdqa %xmm0, %xmm2
    426 ; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
    427 ; SSE41-NEXT:    pblendvb %xmm2, %xmm1
    428 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    429 ; SSE41-NEXT:    retq
    430 ;
    431 ; AVX-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
    432 ; AVX:       # BB#0:
    433 ; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
    434 ; AVX-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
    435 ; AVX-NEXT:    retq
    436   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
    437   ret <16 x i8> %shuffle
    438 }
    439 
    440 define <16 x i8> @shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31(<16 x i8> %a, <16 x i8> %b) {
    441 ; SSE2-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31:
    442 ; SSE2:       # BB#0:
    443 ; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
    444 ; SSE2-NEXT:    andps %xmm2, %xmm0
    445 ; SSE2-NEXT:    andnps %xmm1, %xmm2
    446 ; SSE2-NEXT:    orps %xmm2, %xmm0
    447 ; SSE2-NEXT:    retq
    448 ;
    449 ; SSSE3-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31:
    450 ; SSSE3:       # BB#0:
    451 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[15]
    452 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2],zero,xmm0[4,5,6],zero,xmm0[8,9,10],zero,xmm0[12,13,14],zero
    453 ; SSSE3-NEXT:    por %xmm1, %xmm0
    454 ; SSSE3-NEXT:    retq
    455 ;
    456 ; SSE41-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31:
    457 ; SSE41:       # BB#0:
    458 ; SSE41-NEXT:    movdqa %xmm0, %xmm2
    459 ; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
    460 ; SSE41-NEXT:    pblendvb %xmm2, %xmm1
    461 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    462 ; SSE41-NEXT:    retq
    463 ;
    464 ; AVX-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31:
    465 ; AVX:       # BB#0:
    466 ; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
    467 ; AVX-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
    468 ; AVX-NEXT:    retq
    469   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31>
    470   ret <16 x i8> %shuffle
    471 }
    472 
    473 define <16 x i8> @shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz(<16 x i8> %a) {
    474 ; SSE-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz:
    475 ; SSE:       # BB#0:
    476 ; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
    477 ; SSE-NEXT:    retq
    478 ;
    479 ; AVX-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz:
    480 ; AVX:       # BB#0:
    481 ; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
    482 ; AVX-NEXT:    retq
    483   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31>
    484   ret <16 x i8> %shuffle
    485 }
    486 
    487 define <16 x i8> @shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31(<16 x i8> %a, <16 x i8> %b) {
    488 ; SSE2-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
    489 ; SSE2:       # BB#0:
    490 ; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0]
    491 ; SSE2-NEXT:    andps %xmm2, %xmm0
    492 ; SSE2-NEXT:    andnps %xmm1, %xmm2
    493 ; SSE2-NEXT:    orps %xmm2, %xmm0
    494 ; SSE2-NEXT:    retq
    495 ;
    496 ; SSSE3-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
    497 ; SSSE3:       # BB#0:
    498 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm1[4],zero,zero,xmm1[7],zero,zero,zero,zero,xmm1[12],zero,zero,xmm1[15]
    499 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,xmm0[5,6],zero,xmm0[8,9,10,11],zero,xmm0[13,14],zero
    500 ; SSSE3-NEXT:    por %xmm1, %xmm0
    501 ; SSSE3-NEXT:    retq
    502 ;
    503 ; SSE41-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
    504 ; SSE41:       # BB#0:
    505 ; SSE41-NEXT:    movdqa %xmm0, %xmm2
    506 ; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0]
    507 ; SSE41-NEXT:    pblendvb %xmm2, %xmm1
    508 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    509 ; SSE41-NEXT:    retq
    510 ;
    511 ; AVX-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
    512 ; AVX:       # BB#0:
    513 ; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0]
    514 ; AVX-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
    515 ; AVX-NEXT:    retq
    516   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 13, i32 14, i32 31>
    517   ret <16 x i8> %shuffle
    518 }
    519 
    520 define <16 x i8> @shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15(<16 x i8> %a, <16 x i8> %b) {
    521 ; SSE2-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15:
    522 ; SSE2:       # BB#0:
    523 ; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0]
    524 ; SSE2-NEXT:    andps %xmm2, %xmm1
    525 ; SSE2-NEXT:    andnps %xmm0, %xmm2
    526 ; SSE2-NEXT:    orps %xmm1, %xmm2
    527 ; SSE2-NEXT:    movaps %xmm2, %xmm0
    528 ; SSE2-NEXT:    retq
    529 ;
    530 ; SSSE3-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15:
    531 ; SSSE3:       # BB#0:
    532 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[4,5,6,7],zero,zero,xmm0[10,11],zero,xmm0[13],zero,xmm0[15]
    533 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3],zero,zero,zero,zero,xmm1[8,9],zero,zero,xmm1[12],zero,xmm1[14],zero
    534 ; SSSE3-NEXT:    por %xmm1, %xmm0
    535 ; SSSE3-NEXT:    retq
    536 ;
    537 ; SSE41-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15:
    538 ; SSE41:       # BB#0:
    539 ; SSE41-NEXT:    movdqa %xmm0, %xmm2
    540 ; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0]
    541 ; SSE41-NEXT:    pblendvb %xmm1, %xmm2
    542 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
    543 ; SSE41-NEXT:    retq
    544 ;
    545 ; AVX-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15:
    546 ; AVX:       # BB#0:
    547 ; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0]
    548 ; AVX-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
    549 ; AVX-NEXT:    retq
    550   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 10, i32 11, i32 28, i32 13, i32 30, i32 15>
    551   ret <16 x i8> %shuffle
    552 }
    553 
    554 define <16 x i8> @trunc_v4i32_shuffle(<16 x i8> %a) {
    555 ; SSE2-LABEL: trunc_v4i32_shuffle:
    556 ; SSE2:       # BB#0:
    557 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
    558 ; SSE2-NEXT:    packuswb %xmm0, %xmm0
    559 ; SSE2-NEXT:    packuswb %xmm0, %xmm0
    560 ; SSE2-NEXT:    retq
    561 ;
    562 ; SSSE3-LABEL: trunc_v4i32_shuffle:
    563 ; SSSE3:       # BB#0:
    564 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
    565 ; SSSE3-NEXT:    retq
    566 ;
    567 ; SSE41-LABEL: trunc_v4i32_shuffle:
    568 ; SSE41:       # BB#0:
    569 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
    570 ; SSE41-NEXT:    retq
    571 ;
    572 ; AVX-LABEL: trunc_v4i32_shuffle:
    573 ; AVX:       # BB#0:
    574 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
    575 ; AVX-NEXT:    retq
    576   %shuffle = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    577   ret <16 x i8> %shuffle
    578 }
    579 
    580 define <16 x i8> @stress_test0(<16 x i8> %s.0.1, <16 x i8> %s.0.2, <16 x i8> %s.0.3, <16 x i8> %s.0.4, <16 x i8> %s.0.5, <16 x i8> %s.0.6, <16 x i8> %s.0.7, <16 x i8> %s.0.8, <16 x i8> %s.0.9) {
    581 ; We don't have anything useful to check here. This generates 100s of
    582 ; instructions. Instead, just make sure we survived codegen.
    583 ; ALL-LABEL: stress_test0:
    584 ; ALL:         retq
    585 entry:
    586   %s.1.4 = shufflevector <16 x i8> %s.0.4, <16 x i8> %s.0.5, <16 x i32> <i32 1, i32 22, i32 21, i32 28, i32 3, i32 16, i32 6, i32 1, i32 19, i32 29, i32 12, i32 31, i32 2, i32 3, i32 3, i32 6>
    587   %s.1.5 = shufflevector <16 x i8> %s.0.5, <16 x i8> %s.0.6, <16 x i32> <i32 31, i32 20, i32 12, i32 19, i32 2, i32 15, i32 12, i32 31, i32 2, i32 28, i32 2, i32 30, i32 7, i32 8, i32 17, i32 28>
    588   %s.1.8 = shufflevector <16 x i8> %s.0.8, <16 x i8> %s.0.9, <16 x i32> <i32 14, i32 10, i32 17, i32 5, i32 17, i32 9, i32 17, i32 21, i32 31, i32 24, i32 16, i32 6, i32 20, i32 28, i32 23, i32 8>
    589   %s.2.2 = shufflevector <16 x i8> %s.0.3, <16 x i8> %s.0.4, <16 x i32> <i32 20, i32 9, i32 21, i32 11, i32 11, i32 4, i32 3, i32 18, i32 3, i32 30, i32 4, i32 31, i32 11, i32 24, i32 13, i32 29>
    590   %s.3.2 = shufflevector <16 x i8> %s.2.2, <16 x i8> %s.1.4, <16 x i32> <i32 15, i32 13, i32 5, i32 11, i32 7, i32 17, i32 14, i32 22, i32 22, i32 16, i32 7, i32 24, i32 16, i32 22, i32 7, i32 29>
    591   %s.5.4 = shufflevector <16 x i8> %s.1.5, <16 x i8> %s.1.8, <16 x i32> <i32 3, i32 13, i32 19, i32 7, i32 23, i32 11, i32 1, i32 9, i32 16, i32 25, i32 2, i32 7, i32 0, i32 21, i32 23, i32 17>
    592   %s.6.1 = shufflevector <16 x i8> %s.3.2, <16 x i8> %s.3.2, <16 x i32> <i32 11, i32 2, i32 28, i32 31, i32 27, i32 3, i32 9, i32 27, i32 25, i32 25, i32 14, i32 7, i32 12, i32 28, i32 12, i32 23>
    593   %s.7.1 = shufflevector <16 x i8> %s.6.1, <16 x i8> %s.3.2, <16 x i32> <i32 15, i32 29, i32 14, i32 0, i32 29, i32 15, i32 26, i32 30, i32 6, i32 7, i32 2, i32 8, i32 12, i32 10, i32 29, i32 17>
    594   %s.7.2 = shufflevector <16 x i8> %s.3.2, <16 x i8> %s.5.4, <16 x i32> <i32 3, i32 29, i32 3, i32 19, i32 undef, i32 20, i32 undef, i32 3, i32 27, i32 undef, i32 undef, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
    595   %s.16.0 = shufflevector <16 x i8> %s.7.1, <16 x i8> %s.7.2, <16 x i32> <i32 13, i32 1, i32 16, i32 16, i32 6, i32 7, i32 29, i32 18, i32 19, i32 28, i32 undef, i32 undef, i32 31, i32 1, i32 undef, i32 10>
    596   ret <16 x i8> %s.16.0
    597 }
    598 
    599 define <16 x i8> @undef_test1(<16 x i8> %s.0.5, <16 x i8> %s.0.8, <16 x i8> %s.0.9) noinline nounwind {
    600 ; There is nothing interesting to check about these instructions other than
    601 ; that they survive codegen. However, we actually do better and delete all of
    602 ; them because the result is 'undef'.
    603 ;
    604 ; ALL-LABEL: undef_test1:
    605 ; ALL:       # BB#0: # %entry
    606 ; ALL-NEXT:    retq
    607 entry:
    608   %s.1.8 = shufflevector <16 x i8> %s.0.8, <16 x i8> undef, <16 x i32> <i32 9, i32 9, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 6, i32 undef, i32 6, i32 undef, i32 14, i32 14, i32 undef, i32 undef, i32 0>
    609   %s.2.4 = shufflevector <16 x i8> undef, <16 x i8> %s.0.5, <16 x i32> <i32 21, i32 undef, i32 undef, i32 19, i32 undef, i32 undef, i32 29, i32 24, i32 21, i32 23, i32 21, i32 17, i32 19, i32 undef, i32 20, i32 22>
    610   %s.2.5 = shufflevector <16 x i8> %s.0.5, <16 x i8> undef, <16 x i32> <i32 3, i32 8, i32 undef, i32 7, i32 undef, i32 10, i32 8, i32 0, i32 15, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 9>
    611   %s.2.9 = shufflevector <16 x i8> %s.0.9, <16 x i8> undef, <16 x i32> <i32 7, i32 undef, i32 14, i32 7, i32 8, i32 undef, i32 7, i32 8, i32 5, i32 15, i32 undef, i32 1, i32 11, i32 undef, i32 undef, i32 11>
    612   %s.3.4 = shufflevector <16 x i8> %s.2.4, <16 x i8> %s.0.5, <16 x i32> <i32 5, i32 0, i32 21, i32 6, i32 15, i32 27, i32 22, i32 21, i32 4, i32 22, i32 19, i32 26, i32 9, i32 26, i32 8, i32 29>
    613   %s.3.9 = shufflevector <16 x i8> %s.2.9, <16 x i8> undef, <16 x i32> <i32 8, i32 6, i32 8, i32 1, i32 undef, i32 4, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 6, i32 undef>
    614   %s.4.7 = shufflevector <16 x i8> %s.1.8, <16 x i8> %s.2.9, <16 x i32> <i32 9, i32 0, i32 22, i32 20, i32 24, i32 7, i32 21, i32 17, i32 20, i32 12, i32 19, i32 23, i32 2, i32 9, i32 17, i32 10>
    615   %s.4.8 = shufflevector <16 x i8> %s.2.9, <16 x i8> %s.3.9, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 6, i32 10, i32 undef, i32 0, i32 5, i32 undef, i32 9, i32 undef>
    616   %s.5.7 = shufflevector <16 x i8> %s.4.7, <16 x i8> %s.4.8, <16 x i32> <i32 16, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    617   %s.8.4 = shufflevector <16 x i8> %s.3.4, <16 x i8> %s.5.7, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 28, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    618   %s.9.4 = shufflevector <16 x i8> %s.8.4, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 10, i32 5>
    619   %s.10.4 = shufflevector <16 x i8> %s.9.4, <16 x i8> undef, <16 x i32> <i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    620   %s.12.4 = shufflevector <16 x i8> %s.10.4, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef>
    621 
    622   ret <16 x i8> %s.12.4
    623 }
    624 
    625 define <16 x i8> @PR20540(<8 x i8> %a) {
    626 ; SSE2-LABEL: PR20540:
    627 ; SSE2:       # BB#0:
    628 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
    629 ; SSE2-NEXT:    packuswb %xmm0, %xmm0
    630 ; SSE2-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
    631 ; SSE2-NEXT:    retq
    632 ;
    633 ; SSSE3-LABEL: PR20540:
    634 ; SSSE3:       # BB#0:
    635 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
    636 ; SSSE3-NEXT:    retq
    637 ;
    638 ; SSE41-LABEL: PR20540:
    639 ; SSE41:       # BB#0:
    640 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
    641 ; SSE41-NEXT:    retq
    642 ;
    643 ; AVX-LABEL: PR20540:
    644 ; AVX:       # BB#0:
    645 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
    646 ; AVX-NEXT:    retq
    647   %shuffle = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
    648   ret <16 x i8> %shuffle
    649 }
    650 
    651 define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
    652 ; SSE-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
    653 ; SSE:       # BB#0:
    654 ; SSE-NEXT:    movzbl %dil, %eax
    655 ; SSE-NEXT:    movd %eax, %xmm0
    656 ; SSE-NEXT:    retq
    657 ;
    658 ; AVX-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
    659 ; AVX:       # BB#0:
    660 ; AVX-NEXT:    movzbl %dil, %eax
    661 ; AVX-NEXT:    vmovd %eax, %xmm0
    662 ; AVX-NEXT:    retq
    663   %a = insertelement <16 x i8> undef, i8 %i, i32 0
    664   %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    665   ret <16 x i8> %shuffle
    666 }
    667 
    668 define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
    669 ; SSE2-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
    670 ; SSE2:       # BB#0:
    671 ; SSE2-NEXT:    shll $8, %edi
    672 ; SSE2-NEXT:    pxor %xmm0, %xmm0
    673 ; SSE2-NEXT:    pinsrw $2, %edi, %xmm0
    674 ; SSE2-NEXT:    retq
    675 ;
    676 ; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
    677 ; SSSE3:       # BB#0:
    678 ; SSSE3-NEXT:    shll $8, %edi
    679 ; SSSE3-NEXT:    pxor %xmm0, %xmm0
    680 ; SSSE3-NEXT:    pinsrw $2, %edi, %xmm0
    681 ; SSSE3-NEXT:    retq
    682 ;
    683 ; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
    684 ; SSE41:       # BB#0:
    685 ; SSE41-NEXT:    pxor %xmm0, %xmm0
    686 ; SSE41-NEXT:    pinsrb $5, %edi, %xmm0
    687 ; SSE41-NEXT:    retq
    688 ;
    689 ; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
    690 ; AVX:       # BB#0:
    691 ; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
    692 ; AVX-NEXT:    vpinsrb $5, %edi, %xmm0, %xmm0
    693 ; AVX-NEXT:    retq
    694   %a = insertelement <16 x i8> undef, i8 %i, i32 0
    695   %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    696   ret <16 x i8> %shuffle
    697 }
    698 
    699 define <16 x i8> @shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16(i8 %i) {
    700 ; SSE2-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
    701 ; SSE2:       # BB#0:
    702 ; SSE2-NEXT:    shll $8, %edi
    703 ; SSE2-NEXT:    pxor %xmm0, %xmm0
    704 ; SSE2-NEXT:    pinsrw $7, %edi, %xmm0
    705 ; SSE2-NEXT:    retq
    706 ;
    707 ; SSSE3-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
    708 ; SSSE3:       # BB#0:
    709 ; SSSE3-NEXT:    shll $8, %edi
    710 ; SSSE3-NEXT:    pxor %xmm0, %xmm0
    711 ; SSSE3-NEXT:    pinsrw $7, %edi, %xmm0
    712 ; SSSE3-NEXT:    retq
    713 ;
    714 ; SSE41-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
    715 ; SSE41:       # BB#0:
    716 ; SSE41-NEXT:    pxor %xmm0, %xmm0
    717 ; SSE41-NEXT:    pinsrb $15, %edi, %xmm0
    718 ; SSE41-NEXT:    retq
    719 ;
    720 ; AVX-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
    721 ; AVX:       # BB#0:
    722 ; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
    723 ; AVX-NEXT:    vpinsrb $15, %edi, %xmm0, %xmm0
    724 ; AVX-NEXT:    retq
    725   %a = insertelement <16 x i8> undef, i8 %i, i32 0
    726   %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16>
    727   ret <16 x i8> %shuffle
    728 }
    729 
    730 define <16 x i8> @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
    731 ; SSE2-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
    732 ; SSE2:       # BB#0:
    733 ; SSE2-NEXT:    movzbl %dil, %eax
    734 ; SSE2-NEXT:    pxor %xmm0, %xmm0
    735 ; SSE2-NEXT:    pinsrw $1, %eax, %xmm0
    736 ; SSE2-NEXT:    retq
    737 ;
    738 ; SSSE3-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
    739 ; SSSE3:       # BB#0:
    740 ; SSSE3-NEXT:    movzbl %dil, %eax
    741 ; SSSE3-NEXT:    pxor %xmm0, %xmm0
    742 ; SSSE3-NEXT:    pinsrw $1, %eax, %xmm0
    743 ; SSSE3-NEXT:    retq
    744 ;
    745 ; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
    746 ; SSE41:       # BB#0:
    747 ; SSE41-NEXT:    pxor %xmm0, %xmm0
    748 ; SSE41-NEXT:    pinsrb $2, %edi, %xmm0
    749 ; SSE41-NEXT:    retq
    750 ;
    751 ; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
    752 ; AVX:       # BB#0:
    753 ; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
    754 ; AVX-NEXT:    vpinsrb $2, %edi, %xmm0, %xmm0
    755 ; AVX-NEXT:    retq
    756   %a = insertelement <16 x i8> undef, i8 %i, i32 3
    757   %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    758   ret <16 x i8> %shuffle
    759 }
    760 
    761 define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu(<16 x i8> %a) {
    762 ; SSE-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu:
    763 ; SSE:       # BB#0:
    764 ; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
    765 ; SSE-NEXT:    retq
    766 ;
    767 ; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu:
    768 ; AVX:       # BB#0:
    769 ; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
    770 ; AVX-NEXT:    retq
    771   %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 undef, i32 18, i32 undef>
    772   ret <16 x i8> %shuffle
    773 }
    774 
    775 define <16 x i8> @shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(<16 x i8> %a) {
    776 ; SSE-LABEL: shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
    777 ; SSE:       # BB#0:
    778 ; SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
    779 ; SSE-NEXT:    retq
    780 ;
    781 ; AVX-LABEL: shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
    782 ; AVX:       # BB#0:
    783 ; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
    784 ; AVX-NEXT:    retq
    785   %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 28, i32 undef, i32 30, i32 31, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 09, i32 0, i32 0, i32 0, i32 0, i32 0>
    786   ret <16 x i8> %shuffle
    787 }
    788 
    789 define <16 x i8> @shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14(<16 x i8> %a, <16 x i8> %b) {
    790 ; SSE2-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
    791 ; SSE2:       # BB#0:
    792 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
    793 ; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    794 ; SSE2-NEXT:    por %xmm1, %xmm0
    795 ; SSE2-NEXT:    retq
    796 ;
    797 ; SSSE3-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
    798 ; SSSE3:       # BB#0:
    799 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    800 ; SSSE3-NEXT:    retq
    801 ;
    802 ; SSE41-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
    803 ; SSE41:       # BB#0:
    804 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    805 ; SSE41-NEXT:    retq
    806 ;
    807 ; AVX-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
    808 ; AVX:       # BB#0:
    809 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    810 ; AVX-NEXT:    retq
    811   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
    812   ret <16 x i8> %shuffle
    813 }
    814 
    815 define <16 x i8> @shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14(<16 x i8> %a, <16 x i8> %b) {
    816 ; SSE2-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
    817 ; SSE2:       # BB#0:
    818 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    819 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
    820 ; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    821 ; SSE2-NEXT:    por %xmm1, %xmm0
    822 ; SSE2-NEXT:    retq
    823 ;
    824 ; SSSE3-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
    825 ; SSSE3:       # BB#0:
    826 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    827 ; SSSE3-NEXT:    retq
    828 ;
    829 ; SSE41-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
    830 ; SSE41:       # BB#0:
    831 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    832 ; SSE41-NEXT:    retq
    833 ;
    834 ; AVX-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
    835 ; AVX:       # BB#0:
    836 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    837 ; AVX-NEXT:    retq
    838   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
    839   ret <16 x i8> %shuffle
    840 }
    841 
    842 define <16 x i8> @shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00(<16 x i8> %a, <16 x i8> %b) {
    843 ; SSE2-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:
    844 ; SSE2:       # BB#0:
    845 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
    846 ; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
    847 ; SSE2-NEXT:    por %xmm1, %xmm0
    848 ; SSE2-NEXT:    retq
    849 ;
    850 ; SSSE3-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:
    851 ; SSSE3:       # BB#0:
    852 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
    853 ; SSSE3-NEXT:    retq
    854 ;
    855 ; SSE41-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:
    856 ; SSE41:       # BB#0:
    857 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
    858 ; SSE41-NEXT:    retq
    859 ;
    860 ; AVX-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:
    861 ; AVX:       # BB#0:
    862 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
    863 ; AVX-NEXT:    retq
    864   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0>
    865   ret <16 x i8> %shuffle
    866 }
    867 
    868 define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16(<16 x i8> %a, <16 x i8> %b) {
    869 ; SSE2-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:
    870 ; SSE2:       # BB#0:
    871 ; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
    872 ; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
    873 ; SSE2-NEXT:    por %xmm1, %xmm0
    874 ; SSE2-NEXT:    retq
    875 ;
    876 ; SSSE3-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:
    877 ; SSSE3:       # BB#0:
    878 ; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
    879 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    880 ; SSSE3-NEXT:    retq
    881 ;
    882 ; SSE41-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:
    883 ; SSE41:       # BB#0:
    884 ; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
    885 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    886 ; SSE41-NEXT:    retq
    887 ;
    888 ; AVX-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:
    889 ; AVX:       # BB#0:
    890 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
    891 ; AVX-NEXT:    retq
    892   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
    893   ret <16 x i8> %shuffle
    894 }
    895 
    896 define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00(<16 x i8> %a, <16 x i8> %b) {
    897 ; SSE2-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:
    898 ; SSE2:       # BB#0:
    899 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    900 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
    901 ; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
    902 ; SSE2-NEXT:    por %xmm1, %xmm0
    903 ; SSE2-NEXT:    retq
    904 ;
    905 ; SSSE3-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:
    906 ; SSSE3:       # BB#0:
    907 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
    908 ; SSSE3-NEXT:    retq
    909 ;
    910 ; SSE41-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:
    911 ; SSE41:       # BB#0:
    912 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
    913 ; SSE41-NEXT:    retq
    914 ;
    915 ; AVX-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:
    916 ; AVX:       # BB#0:
    917 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
    918 ; AVX-NEXT:    retq
    919   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0>
    920   ret <16 x i8> %shuffle
    921 }
    922 
    923 define <16 x i8> @shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<16 x i8> %a, <16 x i8> %b) {
    924 ; SSE2-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
    925 ; SSE2:       # BB#0:
    926 ; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
    927 ; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    928 ; SSE2-NEXT:    por %xmm1, %xmm0
    929 ; SSE2-NEXT:    retq
    930 ;
    931 ; SSSE3-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
    932 ; SSSE3:       # BB#0:
    933 ; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    934 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    935 ; SSSE3-NEXT:    retq
    936 ;
    937 ; SSE41-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
    938 ; SSE41:       # BB#0:
    939 ; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    940 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    941 ; SSE41-NEXT:    retq
    942 ;
    943 ; AVX-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
    944 ; AVX:       # BB#0:
    945 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    946 ; AVX-NEXT:    retq
    947   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
    948   ret <16 x i8> %shuffle
    949 }
    950 
    951 define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %a) {
    952 ; SSE2-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
    953 ; SSE2:       # BB#0:
    954 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    955 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
    956 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0,0,1,1]
    957 ; SSE2-NEXT:    retq
    958 ;
    959 ; SSSE3-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
    960 ; SSSE3:       # BB#0:
    961 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    962 ; SSSE3-NEXT:    retq
    963 ;
    964 ; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
    965 ; SSE41:       # BB#0:
    966 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    967 ; SSE41-NEXT:    retq
    968 ;
    969 ; AVX-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
    970 ; AVX:       # BB#0:
    971 ; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    972 ; AVX-NEXT:    retq
    973   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    974   ret <16 x i8> %shuffle
    975 }
    976 
    977 define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz(<16 x i8> %a) {
    978 ; SSE2-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
    979 ; SSE2:       # BB#0:
    980 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    981 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    982 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    983 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    984 ; SSE2-NEXT:    retq
    985 ;
    986 ; SSSE3-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
    987 ; SSSE3:       # BB#0:
    988 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    989 ; SSSE3-NEXT:    retq
    990 ;
    991 ; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
    992 ; SSE41:       # BB#0:
    993 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    994 ; SSE41-NEXT:    retq
    995 ;
    996 ; AVX-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
    997 ; AVX:       # BB#0:
    998 ; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    999 ; AVX-NEXT:    retq
   1000   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 1, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
   1001   ret <16 x i8> %shuffle
   1002 }
   1003 
   1004 define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu(<16 x i8> %a) {
   1005 ; SSE2-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
   1006 ; SSE2:       # BB#0:
   1007 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
   1008 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
   1009 ; SSE2-NEXT:    retq
   1010 ;
   1011 ; SSSE3-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
   1012 ; SSSE3:       # BB#0:
   1013 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
   1014 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
   1015 ; SSSE3-NEXT:    retq
   1016 ;
   1017 ; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
   1018 ; SSE41:       # BB#0:
   1019 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1020 ; SSE41-NEXT:    retq
   1021 ;
   1022 ; AVX-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
   1023 ; AVX:       # BB#0:
   1024 ; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1025 ; AVX-NEXT:    retq
   1026   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef>
   1027   ret <16 x i8> %shuffle
   1028 }
   1029 
   1030 define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz(<16 x i8> %a) {
   1031 ; SSE2-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
   1032 ; SSE2:       # BB#0:
   1033 ; SSE2-NEXT:    pxor %xmm1, %xmm1
   1034 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1035 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1036 ; SSE2-NEXT:    retq
   1037 ;
   1038 ; SSSE3-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
   1039 ; SSSE3:       # BB#0:
   1040 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
   1041 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1042 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1043 ; SSSE3-NEXT:    retq
   1044 ;
   1045 ; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
   1046 ; SSE41:       # BB#0:
   1047 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1048 ; SSE41-NEXT:    retq
   1049 ;
   1050 ; AVX-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
   1051 ; AVX:       # BB#0:
   1052 ; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1053 ; AVX-NEXT:    retq
   1054   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 1, i32 21, i32 22, i32 23, i32 2, i32 25, i32 26, i32 27, i32 3, i32 29, i32 30, i32 31>
   1055   ret <16 x i8> %shuffle
   1056 }
   1057 
   1058 define <16 x i8> @shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu(<16 x i8> %a) {
   1059 ; SSE2-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
   1060 ; SSE2:       # BB#0:
   1061 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
   1062 ; SSE2-NEXT:    retq
   1063 ;
   1064 ; SSSE3-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
   1065 ; SSSE3:       # BB#0:
   1066 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
   1067 ; SSSE3-NEXT:    retq
   1068 ;
   1069 ; SSE41-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
   1070 ; SSE41:       # BB#0:
   1071 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1072 ; SSE41-NEXT:    retq
   1073 ;
   1074 ; AVX-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
   1075 ; AVX:       # BB#0:
   1076 ; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1077 ; AVX-NEXT:    retq
   1078   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7, i32 undef>
   1079   ret <16 x i8> %shuffle
   1080 }
   1081 
   1082 define <16 x i8> @shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz(<16 x i8> %a) {
   1083 ; SSE2-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
   1084 ; SSE2:       # BB#0:
   1085 ; SSE2-NEXT:    pxor %xmm1, %xmm1
   1086 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1087 ; SSE2-NEXT:    retq
   1088 ;
   1089 ; SSSE3-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
   1090 ; SSSE3:       # BB#0:
   1091 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
   1092 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1093 ; SSSE3-NEXT:    retq
   1094 ;
   1095 ; SSE41-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
   1096 ; SSE41:       # BB#0:
   1097 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1098 ; SSE41-NEXT:    retq
   1099 ;
   1100 ; AVX-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
   1101 ; AVX:       # BB#0:
   1102 ; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1103 ; AVX-NEXT:    retq
   1104   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 1, i32 19, i32 2, i32 21, i32 3, i32 23, i32 4, i32 25, i32 5, i32 27, i32 6, i32 29, i32 7, i32 31>
   1105   ret <16 x i8> %shuffle
   1106 }
   1107 
   1108 define <16 x i8> @shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00(<16 x i8> %a, <16 x i8> %b) {
   1109 ; SSE2-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
   1110 ; SSE2:       # BB#0: # %entry
   1111 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   1112 ; SSE2-NEXT:    movdqa %xmm0, %xmm3
   1113 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
   1114 ; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,3,0,1]
   1115 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm4 = xmm4[0,1,2,2,4,5,6,7]
   1116 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,5,7,7]
   1117 ; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [65535,65535,65535,0,65535,0,0,65535]
   1118 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
   1119 ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,1,0,3]
   1120 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,3,1,3,4,5,6,7]
   1121 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6,4]
   1122 ; SSE2-NEXT:    pand %xmm5, %xmm2
   1123 ; SSE2-NEXT:    pandn %xmm4, %xmm5
   1124 ; SSE2-NEXT:    por %xmm2, %xmm5
   1125 ; SSE2-NEXT:    psrlq $16, %xmm3
   1126 ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
   1127 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,1,3]
   1128 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
   1129 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,4]
   1130 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
   1131 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
   1132 ; SSE2-NEXT:    packuswb %xmm5, %xmm2
   1133 ; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [255,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
   1134 ; SSE2-NEXT:    pand %xmm0, %xmm2
   1135 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
   1136 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,3,3,4,5,6,7]
   1137 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,5,7]
   1138 ; SSE2-NEXT:    pandn %xmm1, %xmm0
   1139 ; SSE2-NEXT:    por %xmm2, %xmm0
   1140 ; SSE2-NEXT:    retq
   1141 ;
   1142 ; SSSE3-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
   1143 ; SSSE3:       # BB#0: # %entry
   1144 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero
   1145 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0]
   1146 ; SSSE3-NEXT:    por %xmm1, %xmm0
   1147 ; SSSE3-NEXT:    retq
   1148 ;
   1149 ; SSE41-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
   1150 ; SSE41:       # BB#0: # %entry
   1151 ; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero
   1152 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0]
   1153 ; SSE41-NEXT:    por %xmm1, %xmm0
   1154 ; SSE41-NEXT:    retq
   1155 ;
   1156 ; AVX-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
   1157 ; AVX:       # BB#0: # %entry
   1158 ; AVX-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero
   1159 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0]
   1160 ; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
   1161 ; AVX-NEXT:    retq
   1162 entry:
   1163   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 undef, i32 10, i32 2, i32 7, i32 22, i32 14, i32 7, i32 2, i32 18, i32 3, i32 1, i32 14, i32 18, i32 9, i32 11, i32 0>
   1164 
   1165   ret <16 x i8> %shuffle
   1166 }
   1167 
   1168 define <16 x i8> @stress_test2(<16 x i8> %s.0.0, <16 x i8> %s.0.1, <16 x i8> %s.0.2) {
   1169 ; Nothing interesting to test here. Just make sure we didn't crashe.
   1170 ; ALL-LABEL: stress_test2:
   1171 ; ALL:         retq
   1172 entry:
   1173   %s.1.0 = shufflevector <16 x i8> %s.0.0, <16 x i8> %s.0.1, <16 x i32> <i32 29, i32 30, i32 2, i32 16, i32 26, i32 21, i32 11, i32 26, i32 26, i32 3, i32 4, i32 5, i32 30, i32 28, i32 15, i32 5>
   1174   %s.1.1 = shufflevector <16 x i8> %s.0.1, <16 x i8> %s.0.2, <16 x i32> <i32 31, i32 1, i32 24, i32 12, i32 28, i32 5, i32 2, i32 9, i32 29, i32 1, i32 31, i32 5, i32 6, i32 17, i32 15, i32 22>
   1175   %s.2.0 = shufflevector <16 x i8> %s.1.0, <16 x i8> %s.1.1, <16 x i32> <i32 22, i32 1, i32 12, i32 3, i32 30, i32 4, i32 30, i32 undef, i32 1, i32 10, i32 14, i32 18, i32 27, i32 13, i32 16, i32 19>
   1176 
   1177   ret <16 x i8> %s.2.0
   1178 }
   1179 
   1180 define void @constant_gets_selected(<4 x i32>* %ptr1, <4 x i32>* %ptr2) {
   1181 ; SSE-LABEL: constant_gets_selected:
   1182 ; SSE:       # BB#0: # %entry
   1183 ; SSE-NEXT:    xorps %xmm0, %xmm0
   1184 ; SSE-NEXT:    movaps %xmm0, (%rdi)
   1185 ; SSE-NEXT:    movaps %xmm0, (%rsi)
   1186 ; SSE-NEXT:    retq
   1187 ;
   1188 ; AVX-LABEL: constant_gets_selected:
   1189 ; AVX:       # BB#0: # %entry
   1190 ; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
   1191 ; AVX-NEXT:    vmovaps %xmm0, (%rdi)
   1192 ; AVX-NEXT:    vmovaps %xmm0, (%rsi)
   1193 ; AVX-NEXT:    retq
   1194 entry:
   1195   %weird_zero = bitcast <4 x i32> zeroinitializer to <16 x i8>
   1196   %shuffle.i = shufflevector <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0>, <16 x i8> %weird_zero, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
   1197   %weirder_zero = bitcast <16 x i8> %shuffle.i to <4 x i32>
   1198   store <4 x i32> %weirder_zero, <4 x i32>* %ptr1, align 16
   1199   store <4 x i32> zeroinitializer, <4 x i32>* %ptr2, align 16
   1200   ret void
   1201 }
   1202 
   1203 ;
   1204 ; Shuffle to logical bit shifts
   1205 ;
   1206 
   1207 define <16 x i8> @shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14(<16 x i8> %a, <16 x i8> %b) {
   1208 ; SSE-LABEL: shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
   1209 ; SSE:       # BB#0:
   1210 ; SSE-NEXT:    psllw $8, %xmm0
   1211 ; SSE-NEXT:    retq
   1212 ;
   1213 ; AVX-LABEL: shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
   1214 ; AVX:       # BB#0:
   1215 ; AVX-NEXT:    vpsllw $8, %xmm0, %xmm0
   1216 ; AVX-NEXT:    retq
   1217   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 0, i32 16, i32 2, i32 16, i32 4, i32 16, i32 6, i32 16, i32 8, i32 16, i32 10, i32 16, i32 12, i32 16, i32 14>
   1218   ret <16 x i8> %shuffle
   1219 }
   1220 
   1221 define <16 x i8> @shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12(<16 x i8> %a, <16 x i8> %b) {
   1222 ; SSE-LABEL: shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
   1223 ; SSE:       # BB#0:
   1224 ; SSE-NEXT:    pslld $24, %xmm0
   1225 ; SSE-NEXT:    retq
   1226 ;
   1227 ; AVX-LABEL: shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
   1228 ; AVX:       # BB#0:
   1229 ; AVX-NEXT:    vpslld $24, %xmm0, %xmm0
   1230 ; AVX-NEXT:    retq
   1231   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 12>
   1232   ret <16 x i8> %shuffle
   1233 }
   1234 
   1235 define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08(<16 x i8> %a, <16 x i8> %b) {
   1236 ; SSE-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08:
   1237 ; SSE:       # BB#0:
   1238 ; SSE-NEXT:    psllq $56, %xmm0
   1239 ; SSE-NEXT:    retq
   1240 ;
   1241 ; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08:
   1242 ; AVX:       # BB#0:
   1243 ; AVX-NEXT:    vpsllq $56, %xmm0, %xmm0
   1244 ; AVX-NEXT:    retq
   1245   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 8>
   1246   ret <16 x i8> %shuffle
   1247 }
   1248 
   1249 define <16 x i8> @shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14(<16 x i8> %a, <16 x i8> %b) {
   1250 ; SSE-LABEL: shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14:
   1251 ; SSE:       # BB#0:
   1252 ; SSE-NEXT:    psllq $8, %xmm0
   1253 ; SSE-NEXT:    retq
   1254 ;
   1255 ; AVX-LABEL: shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14:
   1256 ; AVX:       # BB#0:
   1257 ; AVX-NEXT:    vpsllq $8, %xmm0, %xmm0
   1258 ; AVX-NEXT:    retq
   1259   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 0, i32 undef, i32 2, i32 3, i32 undef, i32 5, i32 6, i32 16, i32 8, i32 9, i32 undef, i32 11, i32 12, i32 13, i32 14>
   1260   ret <16 x i8> %shuffle
   1261 }
   1262 
   1263 define <16 x i8> @shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz(<16 x i8> %a, <16 x i8> %b) {
   1264 ; SSE-LABEL: shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz:
   1265 ; SSE:       # BB#0:
   1266 ; SSE-NEXT:    psrlw $8, %xmm0
   1267 ; SSE-NEXT:    retq
   1268 ;
   1269 ; AVX-LABEL: shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz:
   1270 ; AVX:       # BB#0:
   1271 ; AVX-NEXT:    vpsrlw $8, %xmm0, %xmm0
   1272 ; AVX-NEXT:    retq
   1273   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 undef, i32 16, i32 undef, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15, i32 16>
   1274   ret <16 x i8> %shuffle
   1275 }
   1276 
   1277 define <16 x i8> @shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz(<16 x i8> %a, <16 x i8> %b) {
   1278 ; SSE-LABEL: shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz:
   1279 ; SSE:       # BB#0:
   1280 ; SSE-NEXT:    psrld $16, %xmm0
   1281 ; SSE-NEXT:    retq
   1282 ;
   1283 ; AVX-LABEL: shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz:
   1284 ; AVX:       # BB#0:
   1285 ; AVX-NEXT:    vpsrld $16, %xmm0, %xmm0
   1286 ; AVX-NEXT:    retq
   1287   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 14, i32 15, i32 16, i32 16>
   1288   ret <16 x i8> %shuffle
   1289 }
   1290 
   1291 define <16 x i8> @shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz(<16 x i8> %a, <16 x i8> %b) {
   1292 ; SSE-LABEL: shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz:
   1293 ; SSE:       # BB#0:
   1294 ; SSE-NEXT:    psrlq $56, %xmm0
   1295 ; SSE-NEXT:    retq
   1296 ;
   1297 ; AVX-LABEL: shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz:
   1298 ; AVX:       # BB#0:
   1299 ; AVX-NEXT:    vpsrlq $56, %xmm0, %xmm0
   1300 ; AVX-NEXT:    retq
   1301   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 16>
   1302   ret <16 x i8> %shuffle
   1303 }
   1304 
   1305 define <16 x i8> @PR12412(<16 x i8> %inval1, <16 x i8> %inval2) {
   1306 ; SSE2-LABEL: PR12412:
   1307 ; SSE2:       # BB#0: # %entry
   1308 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
   1309 ; SSE2-NEXT:    pand %xmm2, %xmm1
   1310 ; SSE2-NEXT:    pand %xmm2, %xmm0
   1311 ; SSE2-NEXT:    packuswb %xmm1, %xmm0
   1312 ; SSE2-NEXT:    retq
   1313 ;
   1314 ; SSSE3-LABEL: PR12412:
   1315 ; SSSE3:       # BB#0: # %entry
   1316 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
   1317 ; SSSE3-NEXT:    pshufb %xmm2, %xmm1
   1318 ; SSSE3-NEXT:    pshufb %xmm2, %xmm0
   1319 ; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1320 ; SSSE3-NEXT:    retq
   1321 ;
   1322 ; SSE41-LABEL: PR12412:
   1323 ; SSE41:       # BB#0: # %entry
   1324 ; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
   1325 ; SSE41-NEXT:    pshufb %xmm2, %xmm1
   1326 ; SSE41-NEXT:    pshufb %xmm2, %xmm0
   1327 ; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1328 ; SSE41-NEXT:    retq
   1329 ;
   1330 ; AVX-LABEL: PR12412:
   1331 ; AVX:       # BB#0: # %entry
   1332 ; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
   1333 ; AVX-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
   1334 ; AVX-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
   1335 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1336 ; AVX-NEXT:    retq
   1337 entry:
   1338   %0 = shufflevector <16 x i8> %inval1, <16 x i8> %inval2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
   1339   ret <16 x i8> %0
   1340 }
   1341 
   1342 define <16 x i8> @shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz(<16 x i8> %a) {
   1343 ; SSE-LABEL: shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz:
   1344 ; SSE:       # BB#0:
   1345 ; SSE-NEXT:    psrld $8, %xmm0
   1346 ; SSE-NEXT:    retq
   1347 ;
   1348 ; AVX-LABEL: shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz:
   1349 ; AVX:       # BB#0:
   1350 ; AVX-NEXT:    vpsrld $8, %xmm0, %xmm0
   1351 ; AVX-NEXT:    retq
   1352   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 2, i32 3, i32 16, i32 undef, i32 6, i32 7, i32 16, i32 undef, i32 10, i32 11, i32 16, i32 undef, i32 14, i32 15, i32 16>
   1353   ret <16 x i8> %shuffle
   1354 }
   1355 
   1356 define <16 x i8> @shuffle_v16i8_bitcast_unpack(<16 x i8> %a, <16 x i8> %b) {
   1357 ; SSE-LABEL: shuffle_v16i8_bitcast_unpack:
   1358 ; SSE:       # BB#0:
   1359 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1360 ; SSE-NEXT:    retq
   1361 ;
   1362 ; AVX-LABEL: shuffle_v16i8_bitcast_unpack:
   1363 ; AVX:       # BB#0:
   1364 ; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1365 ; AVX-NEXT:    retq
   1366   %shuffle8  = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 7, i32 23, i32 6, i32 22, i32 5, i32 21, i32 4, i32 20, i32 3, i32 19, i32 2, i32 18, i32 1, i32 17, i32 0, i32 16>
   1367   %bitcast32 = bitcast <16 x i8> %shuffle8 to <4 x float>
   1368   %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   1369   %bitcast16 = bitcast <4 x float> %shuffle32 to <8 x i16>
   1370   %shuffle16 = shufflevector <8 x i16> %bitcast16, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
   1371   %bitcast8  = bitcast <8 x i16> %shuffle16 to <16 x i8>
   1372   ret <16 x i8> %bitcast8
   1373 }
   1374 
   1375 define <16 x i8> @insert_dup_mem_v16i8_i32(i32* %ptr) {
   1376 ; SSE2-LABEL: insert_dup_mem_v16i8_i32:
   1377 ; SSE2:       # BB#0:
   1378 ; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   1379 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
   1380 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
   1381 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   1382 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
   1383 ; SSE2-NEXT:    retq
   1384 ;
   1385 ; SSSE3-LABEL: insert_dup_mem_v16i8_i32:
   1386 ; SSSE3:       # BB#0:
   1387 ; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   1388 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
   1389 ; SSSE3-NEXT:    pshufb %xmm1, %xmm0
   1390 ; SSSE3-NEXT:    retq
   1391 ;
   1392 ; SSE41-LABEL: insert_dup_mem_v16i8_i32:
   1393 ; SSE41:       # BB#0:
   1394 ; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   1395 ; SSE41-NEXT:    pxor %xmm1, %xmm1
   1396 ; SSE41-NEXT:    pshufb %xmm1, %xmm0
   1397 ; SSE41-NEXT:    retq
   1398 ;
   1399 ; AVX1-LABEL: insert_dup_mem_v16i8_i32:
   1400 ; AVX1:       # BB#0:
   1401 ; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   1402 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1403 ; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
   1404 ; AVX1-NEXT:    retq
   1405 ;
   1406 ; AVX2-LABEL: insert_dup_mem_v16i8_i32:
   1407 ; AVX2:       # BB#0:
   1408 ; AVX2-NEXT:    vpbroadcastb (%rdi), %xmm0
   1409 ; AVX2-NEXT:    retq
   1410   %tmp = load i32, i32* %ptr, align 4
   1411   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
   1412   %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
   1413   %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> zeroinitializer
   1414   ret <16 x i8> %tmp3
   1415 }
   1416 
   1417 define <16 x i8> @insert_dup_mem_v16i8_sext_i8(i8* %ptr) {
   1418 ; SSE2-LABEL: insert_dup_mem_v16i8_sext_i8:
   1419 ; SSE2:       # BB#0:
   1420 ; SSE2-NEXT:    movsbl (%rdi), %eax
   1421 ; SSE2-NEXT:    movd %eax, %xmm0
   1422 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
   1423 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
   1424 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
   1425 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
   1426 ; SSE2-NEXT:    retq
   1427 ;
   1428 ; SSSE3-LABEL: insert_dup_mem_v16i8_sext_i8:
   1429 ; SSSE3:       # BB#0:
   1430 ; SSSE3-NEXT:    movsbl (%rdi), %eax
   1431 ; SSSE3-NEXT:    movd %eax, %xmm0
   1432 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
   1433 ; SSSE3-NEXT:    pshufb %xmm1, %xmm0
   1434 ; SSSE3-NEXT:    retq
   1435 ;
   1436 ; SSE41-LABEL: insert_dup_mem_v16i8_sext_i8:
   1437 ; SSE41:       # BB#0:
   1438 ; SSE41-NEXT:    movsbl (%rdi), %eax
   1439 ; SSE41-NEXT:    movd %eax, %xmm0
   1440 ; SSE41-NEXT:    pxor %xmm1, %xmm1
   1441 ; SSE41-NEXT:    pshufb %xmm1, %xmm0
   1442 ; SSE41-NEXT:    retq
   1443 ;
   1444 ; AVX1-LABEL: insert_dup_mem_v16i8_sext_i8:
   1445 ; AVX1:       # BB#0:
   1446 ; AVX1-NEXT:    movsbl (%rdi), %eax
   1447 ; AVX1-NEXT:    vmovd %eax, %xmm0
   1448 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1449 ; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
   1450 ; AVX1-NEXT:    retq
   1451 ;
   1452 ; AVX2-LABEL: insert_dup_mem_v16i8_sext_i8:
   1453 ; AVX2:       # BB#0:
   1454 ; AVX2-NEXT:    vpbroadcastb (%rdi), %xmm0
   1455 ; AVX2-NEXT:    retq
   1456   %tmp = load i8, i8* %ptr, align 1
   1457   %tmp1 = sext i8 %tmp to i32
   1458   %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
   1459   %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
   1460   %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <16 x i32> zeroinitializer
   1461   ret <16 x i8> %tmp4
   1462 }
   1463 
   1464 define <16 x i8> @insert_dup_elt1_mem_v16i8_i32(i32* %ptr) {
   1465 ; SSE2-LABEL: insert_dup_elt1_mem_v16i8_i32:
   1466 ; SSE2:       # BB#0:
   1467 ; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   1468 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
   1469 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
   1470 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
   1471 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
   1472 ; SSE2-NEXT:    retq
   1473 ;
   1474 ; SSSE3-LABEL: insert_dup_elt1_mem_v16i8_i32:
   1475 ; SSSE3:       # BB#0:
   1476 ; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   1477 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
   1478 ; SSSE3-NEXT:    retq
   1479 ;
   1480 ; SSE41-LABEL: insert_dup_elt1_mem_v16i8_i32:
   1481 ; SSE41:       # BB#0:
   1482 ; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   1483 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
   1484 ; SSE41-NEXT:    retq
   1485 ;
   1486 ; AVX1-LABEL: insert_dup_elt1_mem_v16i8_i32:
   1487 ; AVX1:       # BB#0:
   1488 ; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   1489 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
   1490 ; AVX1-NEXT:    retq
   1491 ;
   1492 ; AVX2-LABEL: insert_dup_elt1_mem_v16i8_i32:
   1493 ; AVX2:       # BB#0:
   1494 ; AVX2-NEXT:    vpbroadcastb 1(%rdi), %xmm0
   1495 ; AVX2-NEXT:    retq
   1496   %tmp = load i32, i32* %ptr, align 4
   1497   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
   1498   %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
   1499   %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   1500   ret <16 x i8> %tmp3
   1501 }
   1502 
   1503 define <16 x i8> @insert_dup_elt2_mem_v16i8_i32(i32* %ptr) {
   1504 ; SSE2-LABEL: insert_dup_elt2_mem_v16i8_i32:
   1505 ; SSE2:       # BB#0:
   1506 ; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   1507 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
   1508 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,2,1]
   1509 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,2,4,5,6,7]
   1510 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,6]
   1511 ; SSE2-NEXT:    retq
   1512 ;
   1513 ; SSSE3-LABEL: insert_dup_elt2_mem_v16i8_i32:
   1514 ; SSSE3:       # BB#0:
   1515 ; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   1516 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
   1517 ; SSSE3-NEXT:    retq
   1518 ;
   1519 ; SSE41-LABEL: insert_dup_elt2_mem_v16i8_i32:
   1520 ; SSE41:       # BB#0:
   1521 ; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   1522 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
   1523 ; SSE41-NEXT:    retq
   1524 ;
   1525 ; AVX1-LABEL: insert_dup_elt2_mem_v16i8_i32:
   1526 ; AVX1:       # BB#0:
   1527 ; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   1528 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
   1529 ; AVX1-NEXT:    retq
   1530 ;
   1531 ; AVX2-LABEL: insert_dup_elt2_mem_v16i8_i32:
   1532 ; AVX2:       # BB#0:
   1533 ; AVX2-NEXT:    vpbroadcastb 2(%rdi), %xmm0
   1534 ; AVX2-NEXT:    retq
   1535   %tmp = load i32, i32* %ptr, align 4
   1536   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
   1537   %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
   1538   %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
   1539   ret <16 x i8> %tmp3
   1540 }
   1541 
   1542 define <16 x i8> @insert_dup_elt1_mem_v16i8_sext_i8(i8* %ptr) {
   1543 ; SSE2-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
   1544 ; SSE2:       # BB#0:
   1545 ; SSE2-NEXT:    movsbl (%rdi), %eax
   1546 ; SSE2-NEXT:    movd %eax, %xmm0
   1547 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
   1548 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
   1549 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
   1550 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
   1551 ; SSE2-NEXT:    retq
   1552 ;
   1553 ; SSSE3-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
   1554 ; SSSE3:       # BB#0:
   1555 ; SSSE3-NEXT:    movsbl (%rdi), %eax
   1556 ; SSSE3-NEXT:    movd %eax, %xmm0
   1557 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
   1558 ; SSSE3-NEXT:    retq
   1559 ;
   1560 ; SSE41-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
   1561 ; SSE41:       # BB#0:
   1562 ; SSE41-NEXT:    movsbl (%rdi), %eax
   1563 ; SSE41-NEXT:    movd %eax, %xmm0
   1564 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
   1565 ; SSE41-NEXT:    retq
   1566 ;
   1567 ; AVX1-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
   1568 ; AVX1:       # BB#0:
   1569 ; AVX1-NEXT:    movsbl (%rdi), %eax
   1570 ; AVX1-NEXT:    vmovd %eax, %xmm0
   1571 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
   1572 ; AVX1-NEXT:    retq
   1573 ;
   1574 ; AVX2-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
   1575 ; AVX2:       # BB#0:
   1576 ; AVX2-NEXT:    movsbl (%rdi), %eax
   1577 ; AVX2-NEXT:    shrl $8, %eax
   1578 ; AVX2-NEXT:    vmovd %eax, %xmm0
   1579 ; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
   1580 ; AVX2-NEXT:    retq
   1581   %tmp = load i8, i8* %ptr, align 1
   1582   %tmp1 = sext i8 %tmp to i32
   1583   %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
   1584   %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
   1585   %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   1586   ret <16 x i8> %tmp4
   1587 }
   1588 
   1589 define <16 x i8> @insert_dup_elt2_mem_v16i8_sext_i8(i8* %ptr) {
   1590 ; SSE2-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
   1591 ; SSE2:       # BB#0:
   1592 ; SSE2-NEXT:    movsbl (%rdi), %eax
   1593 ; SSE2-NEXT:    movd %eax, %xmm0
   1594 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
   1595 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,2,1]
   1596 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,2,4,5,6,7]
   1597 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,6]
   1598 ; SSE2-NEXT:    retq
   1599 ;
   1600 ; SSSE3-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
   1601 ; SSSE3:       # BB#0:
   1602 ; SSSE3-NEXT:    movsbl (%rdi), %eax
   1603 ; SSSE3-NEXT:    movd %eax, %xmm0
   1604 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
   1605 ; SSSE3-NEXT:    retq
   1606 ;
   1607 ; SSE41-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
   1608 ; SSE41:       # BB#0:
   1609 ; SSE41-NEXT:    movsbl (%rdi), %eax
   1610 ; SSE41-NEXT:    movd %eax, %xmm0
   1611 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
   1612 ; SSE41-NEXT:    retq
   1613 ;
   1614 ; AVX1-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
   1615 ; AVX1:       # BB#0:
   1616 ; AVX1-NEXT:    movsbl (%rdi), %eax
   1617 ; AVX1-NEXT:    vmovd %eax, %xmm0
   1618 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
   1619 ; AVX1-NEXT:    retq
   1620 ;
   1621 ; AVX2-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
   1622 ; AVX2:       # BB#0:
   1623 ; AVX2-NEXT:    movsbl (%rdi), %eax
   1624 ; AVX2-NEXT:    shrl $16, %eax
   1625 ; AVX2-NEXT:    vmovd %eax, %xmm0
   1626 ; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
   1627 ; AVX2-NEXT:    retq
   1628   %tmp = load i8, i8* %ptr, align 1
   1629   %tmp1 = sext i8 %tmp to i32
   1630   %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
   1631   %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
   1632   %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
   1633   ret <16 x i8> %tmp4
   1634 }
   1635