Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2
      8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ
      9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
     10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL --check-prefix=AVX512DQVL
     11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL --check-prefix=AVX512BWVL
     12 ;
     13 ; Just one 32-bit run to make sure we do reasonable things for i64 shifts.
     14 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X32-SSE --check-prefix=X32-SSE2
     15 
     16 ;
     17 ; Variable Shifts
     18 ;
     19 
     20 define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
     21 ; SSE2-LABEL: var_shift_v2i64:
     22 ; SSE2:       # %bb.0:
     23 ; SSE2-NEXT:    movdqa %xmm0, %xmm2
     24 ; SSE2-NEXT:    psllq %xmm1, %xmm2
     25 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
     26 ; SSE2-NEXT:    psllq %xmm1, %xmm0
     27 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
     28 ; SSE2-NEXT:    retq
     29 ;
     30 ; SSE41-LABEL: var_shift_v2i64:
     31 ; SSE41:       # %bb.0:
     32 ; SSE41-NEXT:    movdqa %xmm0, %xmm2
     33 ; SSE41-NEXT:    psllq %xmm1, %xmm2
     34 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
     35 ; SSE41-NEXT:    psllq %xmm1, %xmm0
     36 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
     37 ; SSE41-NEXT:    retq
     38 ;
     39 ; AVX1-LABEL: var_shift_v2i64:
     40 ; AVX1:       # %bb.0:
     41 ; AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm2
     42 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
     43 ; AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
     44 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
     45 ; AVX1-NEXT:    retq
     46 ;
     47 ; AVX2-LABEL: var_shift_v2i64:
     48 ; AVX2:       # %bb.0:
     49 ; AVX2-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0
     50 ; AVX2-NEXT:    retq
     51 ;
     52 ; XOPAVX1-LABEL: var_shift_v2i64:
     53 ; XOPAVX1:       # %bb.0:
     54 ; XOPAVX1-NEXT:    vpshlq %xmm1, %xmm0, %xmm0
     55 ; XOPAVX1-NEXT:    retq
     56 ;
     57 ; XOPAVX2-LABEL: var_shift_v2i64:
     58 ; XOPAVX2:       # %bb.0:
     59 ; XOPAVX2-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0
     60 ; XOPAVX2-NEXT:    retq
     61 ;
     62 ; AVX512-LABEL: var_shift_v2i64:
     63 ; AVX512:       # %bb.0:
     64 ; AVX512-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0
     65 ; AVX512-NEXT:    retq
     66 ;
     67 ; AVX512VL-LABEL: var_shift_v2i64:
     68 ; AVX512VL:       # %bb.0:
     69 ; AVX512VL-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0
     70 ; AVX512VL-NEXT:    retq
     71 ;
     72 ; X32-SSE-LABEL: var_shift_v2i64:
     73 ; X32-SSE:       # %bb.0:
     74 ; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
     75 ; X32-SSE-NEXT:    psllq %xmm1, %xmm2
     76 ; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
     77 ; X32-SSE-NEXT:    psllq %xmm1, %xmm0
     78 ; X32-SSE-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
     79 ; X32-SSE-NEXT:    retl
     80   %shift = shl <2 x i64> %a, %b
     81   ret <2 x i64> %shift
     82 }
     83 
     84 define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
     85 ; SSE2-LABEL: var_shift_v4i32:
     86 ; SSE2:       # %bb.0:
     87 ; SSE2-NEXT:    pslld $23, %xmm1
     88 ; SSE2-NEXT:    paddd {{.*}}(%rip), %xmm1
     89 ; SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
     90 ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
     91 ; SSE2-NEXT:    pmuludq %xmm1, %xmm0
     92 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
     93 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
     94 ; SSE2-NEXT:    pmuludq %xmm2, %xmm1
     95 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
     96 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
     97 ; SSE2-NEXT:    retq
     98 ;
     99 ; SSE41-LABEL: var_shift_v4i32:
    100 ; SSE41:       # %bb.0:
    101 ; SSE41-NEXT:    pslld $23, %xmm1
    102 ; SSE41-NEXT:    paddd {{.*}}(%rip), %xmm1
    103 ; SSE41-NEXT:    cvttps2dq %xmm1, %xmm1
    104 ; SSE41-NEXT:    pmulld %xmm1, %xmm0
    105 ; SSE41-NEXT:    retq
    106 ;
    107 ; AVX1-LABEL: var_shift_v4i32:
    108 ; AVX1:       # %bb.0:
    109 ; AVX1-NEXT:    vpslld $23, %xmm1, %xmm1
    110 ; AVX1-NEXT:    vpaddd {{.*}}(%rip), %xmm1, %xmm1
    111 ; AVX1-NEXT:    vcvttps2dq %xmm1, %xmm1
    112 ; AVX1-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
    113 ; AVX1-NEXT:    retq
    114 ;
    115 ; AVX2-LABEL: var_shift_v4i32:
    116 ; AVX2:       # %bb.0:
    117 ; AVX2-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0
    118 ; AVX2-NEXT:    retq
    119 ;
    120 ; XOPAVX1-LABEL: var_shift_v4i32:
    121 ; XOPAVX1:       # %bb.0:
    122 ; XOPAVX1-NEXT:    vpshld %xmm1, %xmm0, %xmm0
    123 ; XOPAVX1-NEXT:    retq
    124 ;
    125 ; XOPAVX2-LABEL: var_shift_v4i32:
    126 ; XOPAVX2:       # %bb.0:
    127 ; XOPAVX2-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0
    128 ; XOPAVX2-NEXT:    retq
    129 ;
    130 ; AVX512-LABEL: var_shift_v4i32:
    131 ; AVX512:       # %bb.0:
    132 ; AVX512-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0
    133 ; AVX512-NEXT:    retq
    134 ;
    135 ; AVX512VL-LABEL: var_shift_v4i32:
    136 ; AVX512VL:       # %bb.0:
    137 ; AVX512VL-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0
    138 ; AVX512VL-NEXT:    retq
    139 ;
    140 ; X32-SSE-LABEL: var_shift_v4i32:
    141 ; X32-SSE:       # %bb.0:
    142 ; X32-SSE-NEXT:    pslld $23, %xmm1
    143 ; X32-SSE-NEXT:    paddd {{\.LCPI.*}}, %xmm1
    144 ; X32-SSE-NEXT:    cvttps2dq %xmm1, %xmm1
    145 ; X32-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
    146 ; X32-SSE-NEXT:    pmuludq %xmm1, %xmm0
    147 ; X32-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    148 ; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
    149 ; X32-SSE-NEXT:    pmuludq %xmm2, %xmm1
    150 ; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
    151 ; X32-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    152 ; X32-SSE-NEXT:    retl
    153   %shift = shl <4 x i32> %a, %b
    154   ret <4 x i32> %shift
    155 }
    156 
    157 define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
    158 ; SSE2-LABEL: var_shift_v8i16:
    159 ; SSE2:       # %bb.0:
    160 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    161 ; SSE2-NEXT:    movdqa %xmm1, %xmm3
    162 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
    163 ; SSE2-NEXT:    pslld $23, %xmm3
    164 ; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [1065353216,1065353216,1065353216,1065353216]
    165 ; SSE2-NEXT:    paddd %xmm4, %xmm3
    166 ; SSE2-NEXT:    cvttps2dq %xmm3, %xmm3
    167 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[0,2,2,3,4,5,6,7]
    168 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,6,6,7]
    169 ; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
    170 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    171 ; SSE2-NEXT:    pslld $23, %xmm1
    172 ; SSE2-NEXT:    paddd %xmm4, %xmm1
    173 ; SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
    174 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
    175 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
    176 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
    177 ; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
    178 ; SSE2-NEXT:    pmullw %xmm1, %xmm0
    179 ; SSE2-NEXT:    retq
    180 ;
    181 ; SSE41-LABEL: var_shift_v8i16:
    182 ; SSE41:       # %bb.0:
    183 ; SSE41-NEXT:    pxor %xmm2, %xmm2
    184 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
    185 ; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    186 ; SSE41-NEXT:    pslld $23, %xmm1
    187 ; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [1065353216,1065353216,1065353216,1065353216]
    188 ; SSE41-NEXT:    paddd %xmm2, %xmm1
    189 ; SSE41-NEXT:    cvttps2dq %xmm1, %xmm1
    190 ; SSE41-NEXT:    pslld $23, %xmm3
    191 ; SSE41-NEXT:    paddd %xmm2, %xmm3
    192 ; SSE41-NEXT:    cvttps2dq %xmm3, %xmm2
    193 ; SSE41-NEXT:    packusdw %xmm1, %xmm2
    194 ; SSE41-NEXT:    pmullw %xmm2, %xmm0
    195 ; SSE41-NEXT:    retq
    196 ;
    197 ; AVX1-LABEL: var_shift_v8i16:
    198 ; AVX1:       # %bb.0:
    199 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
    200 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    201 ; AVX1-NEXT:    vpslld $23, %xmm2, %xmm2
    202 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
    203 ; AVX1-NEXT:    vpaddd %xmm3, %xmm2, %xmm2
    204 ; AVX1-NEXT:    vcvttps2dq %xmm2, %xmm2
    205 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
    206 ; AVX1-NEXT:    vpslld $23, %xmm1, %xmm1
    207 ; AVX1-NEXT:    vpaddd %xmm3, %xmm1, %xmm1
    208 ; AVX1-NEXT:    vcvttps2dq %xmm1, %xmm1
    209 ; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
    210 ; AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
    211 ; AVX1-NEXT:    retq
    212 ;
    213 ; AVX2-LABEL: var_shift_v8i16:
    214 ; AVX2:       # %bb.0:
    215 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
    216 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    217 ; AVX2-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
    218 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
    219 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
    220 ; AVX2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
    221 ; AVX2-NEXT:    vzeroupper
    222 ; AVX2-NEXT:    retq
    223 ;
    224 ; XOP-LABEL: var_shift_v8i16:
    225 ; XOP:       # %bb.0:
    226 ; XOP-NEXT:    vpshlw %xmm1, %xmm0, %xmm0
    227 ; XOP-NEXT:    retq
    228 ;
    229 ; AVX512DQ-LABEL: var_shift_v8i16:
    230 ; AVX512DQ:       # %bb.0:
    231 ; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
    232 ; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    233 ; AVX512DQ-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
    234 ; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
    235 ; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
    236 ; AVX512DQ-NEXT:    vzeroupper
    237 ; AVX512DQ-NEXT:    retq
    238 ;
    239 ; AVX512BW-LABEL: var_shift_v8i16:
    240 ; AVX512BW:       # %bb.0:
    241 ; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
    242 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
    243 ; AVX512BW-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0
    244 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
    245 ; AVX512BW-NEXT:    vzeroupper
    246 ; AVX512BW-NEXT:    retq
    247 ;
    248 ; AVX512DQVL-LABEL: var_shift_v8i16:
    249 ; AVX512DQVL:       # %bb.0:
    250 ; AVX512DQVL-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
    251 ; AVX512DQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    252 ; AVX512DQVL-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
    253 ; AVX512DQVL-NEXT:    vpmovdw %ymm0, %xmm0
    254 ; AVX512DQVL-NEXT:    vzeroupper
    255 ; AVX512DQVL-NEXT:    retq
    256 ;
    257 ; AVX512BWVL-LABEL: var_shift_v8i16:
    258 ; AVX512BWVL:       # %bb.0:
    259 ; AVX512BWVL-NEXT:    vpsllvw %xmm1, %xmm0, %xmm0
    260 ; AVX512BWVL-NEXT:    retq
    261 ;
    262 ; X32-SSE-LABEL: var_shift_v8i16:
    263 ; X32-SSE:       # %bb.0:
    264 ; X32-SSE-NEXT:    pxor %xmm2, %xmm2
    265 ; X32-SSE-NEXT:    movdqa %xmm1, %xmm3
    266 ; X32-SSE-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
    267 ; X32-SSE-NEXT:    pslld $23, %xmm3
    268 ; X32-SSE-NEXT:    movdqa {{.*#+}} xmm4 = [1065353216,1065353216,1065353216,1065353216]
    269 ; X32-SSE-NEXT:    paddd %xmm4, %xmm3
    270 ; X32-SSE-NEXT:    cvttps2dq %xmm3, %xmm3
    271 ; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[0,2,2,3,4,5,6,7]
    272 ; X32-SSE-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,6,6,7]
    273 ; X32-SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
    274 ; X32-SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    275 ; X32-SSE-NEXT:    pslld $23, %xmm1
    276 ; X32-SSE-NEXT:    paddd %xmm4, %xmm1
    277 ; X32-SSE-NEXT:    cvttps2dq %xmm1, %xmm1
    278 ; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
    279 ; X32-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
    280 ; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
    281 ; X32-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
    282 ; X32-SSE-NEXT:    pmullw %xmm1, %xmm0
    283 ; X32-SSE-NEXT:    retl
    284   %shift = shl <8 x i16> %a, %b
    285   ret <8 x i16> %shift
    286 }
    287 
    288 define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
    289 ; SSE2-LABEL: var_shift_v16i8:
    290 ; SSE2:       # %bb.0:
    291 ; SSE2-NEXT:    psllw $5, %xmm1
    292 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    293 ; SSE2-NEXT:    pxor %xmm3, %xmm3
    294 ; SSE2-NEXT:    pcmpgtb %xmm1, %xmm3
    295 ; SSE2-NEXT:    movdqa %xmm3, %xmm4
    296 ; SSE2-NEXT:    pandn %xmm0, %xmm4
    297 ; SSE2-NEXT:    psllw $4, %xmm0
    298 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
    299 ; SSE2-NEXT:    pand %xmm3, %xmm0
    300 ; SSE2-NEXT:    por %xmm4, %xmm0
    301 ; SSE2-NEXT:    paddb %xmm1, %xmm1
    302 ; SSE2-NEXT:    pxor %xmm3, %xmm3
    303 ; SSE2-NEXT:    pcmpgtb %xmm1, %xmm3
    304 ; SSE2-NEXT:    movdqa %xmm3, %xmm4
    305 ; SSE2-NEXT:    pandn %xmm0, %xmm4
    306 ; SSE2-NEXT:    psllw $2, %xmm0
    307 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
    308 ; SSE2-NEXT:    pand %xmm3, %xmm0
    309 ; SSE2-NEXT:    por %xmm4, %xmm0
    310 ; SSE2-NEXT:    paddb %xmm1, %xmm1
    311 ; SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
    312 ; SSE2-NEXT:    movdqa %xmm2, %xmm1
    313 ; SSE2-NEXT:    pandn %xmm0, %xmm1
    314 ; SSE2-NEXT:    paddb %xmm0, %xmm0
    315 ; SSE2-NEXT:    pand %xmm2, %xmm0
    316 ; SSE2-NEXT:    por %xmm1, %xmm0
    317 ; SSE2-NEXT:    retq
    318 ;
    319 ; SSE41-LABEL: var_shift_v16i8:
    320 ; SSE41:       # %bb.0:
    321 ; SSE41-NEXT:    movdqa %xmm0, %xmm2
    322 ; SSE41-NEXT:    psllw $5, %xmm1
    323 ; SSE41-NEXT:    movdqa %xmm0, %xmm3
    324 ; SSE41-NEXT:    psllw $4, %xmm3
    325 ; SSE41-NEXT:    pand {{.*}}(%rip), %xmm3
    326 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    327 ; SSE41-NEXT:    pblendvb %xmm0, %xmm3, %xmm2
    328 ; SSE41-NEXT:    movdqa %xmm2, %xmm3
    329 ; SSE41-NEXT:    psllw $2, %xmm3
    330 ; SSE41-NEXT:    pand {{.*}}(%rip), %xmm3
    331 ; SSE41-NEXT:    paddb %xmm1, %xmm1
    332 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    333 ; SSE41-NEXT:    pblendvb %xmm0, %xmm3, %xmm2
    334 ; SSE41-NEXT:    movdqa %xmm2, %xmm3
    335 ; SSE41-NEXT:    paddb %xmm2, %xmm3
    336 ; SSE41-NEXT:    paddb %xmm1, %xmm1
    337 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    338 ; SSE41-NEXT:    pblendvb %xmm0, %xmm3, %xmm2
    339 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
    340 ; SSE41-NEXT:    retq
    341 ;
    342 ; AVX-LABEL: var_shift_v16i8:
    343 ; AVX:       # %bb.0:
    344 ; AVX-NEXT:    vpsllw $5, %xmm1, %xmm1
    345 ; AVX-NEXT:    vpsllw $4, %xmm0, %xmm2
    346 ; AVX-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
    347 ; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
    348 ; AVX-NEXT:    vpsllw $2, %xmm0, %xmm2
    349 ; AVX-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
    350 ; AVX-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
    351 ; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
    352 ; AVX-NEXT:    vpaddb %xmm0, %xmm0, %xmm2
    353 ; AVX-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
    354 ; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
    355 ; AVX-NEXT:    retq
    356 ;
    357 ; XOP-LABEL: var_shift_v16i8:
    358 ; XOP:       # %bb.0:
    359 ; XOP-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
    360 ; XOP-NEXT:    retq
    361 ;
    362 ; AVX512DQ-LABEL: var_shift_v16i8:
    363 ; AVX512DQ:       # %bb.0:
    364 ; AVX512DQ-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
    365 ; AVX512DQ-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
    366 ; AVX512DQ-NEXT:    vpsllvd %zmm1, %zmm0, %zmm0
    367 ; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
    368 ; AVX512DQ-NEXT:    vzeroupper
    369 ; AVX512DQ-NEXT:    retq
    370 ;
    371 ; AVX512BW-LABEL: var_shift_v16i8:
    372 ; AVX512BW:       # %bb.0:
    373 ; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
    374 ; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    375 ; AVX512BW-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0
    376 ; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
    377 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
    378 ; AVX512BW-NEXT:    vzeroupper
    379 ; AVX512BW-NEXT:    retq
    380 ;
    381 ; AVX512DQVL-LABEL: var_shift_v16i8:
    382 ; AVX512DQVL:       # %bb.0:
    383 ; AVX512DQVL-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
    384 ; AVX512DQVL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
    385 ; AVX512DQVL-NEXT:    vpsllvd %zmm1, %zmm0, %zmm0
    386 ; AVX512DQVL-NEXT:    vpmovdb %zmm0, %xmm0
    387 ; AVX512DQVL-NEXT:    vzeroupper
    388 ; AVX512DQVL-NEXT:    retq
    389 ;
    390 ; AVX512BWVL-LABEL: var_shift_v16i8:
    391 ; AVX512BWVL:       # %bb.0:
    392 ; AVX512BWVL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
    393 ; AVX512BWVL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    394 ; AVX512BWVL-NEXT:    vpsllvw %ymm1, %ymm0, %ymm0
    395 ; AVX512BWVL-NEXT:    vpmovwb %ymm0, %xmm0
    396 ; AVX512BWVL-NEXT:    vzeroupper
    397 ; AVX512BWVL-NEXT:    retq
    398 ;
    399 ; X32-SSE-LABEL: var_shift_v16i8:
    400 ; X32-SSE:       # %bb.0:
    401 ; X32-SSE-NEXT:    psllw $5, %xmm1
    402 ; X32-SSE-NEXT:    pxor %xmm2, %xmm2
    403 ; X32-SSE-NEXT:    pxor %xmm3, %xmm3
    404 ; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
    405 ; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
    406 ; X32-SSE-NEXT:    pandn %xmm0, %xmm4
    407 ; X32-SSE-NEXT:    psllw $4, %xmm0
    408 ; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
    409 ; X32-SSE-NEXT:    pand %xmm3, %xmm0
    410 ; X32-SSE-NEXT:    por %xmm4, %xmm0
    411 ; X32-SSE-NEXT:    paddb %xmm1, %xmm1
    412 ; X32-SSE-NEXT:    pxor %xmm3, %xmm3
    413 ; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
    414 ; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
    415 ; X32-SSE-NEXT:    pandn %xmm0, %xmm4
    416 ; X32-SSE-NEXT:    psllw $2, %xmm0
    417 ; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
    418 ; X32-SSE-NEXT:    pand %xmm3, %xmm0
    419 ; X32-SSE-NEXT:    por %xmm4, %xmm0
    420 ; X32-SSE-NEXT:    paddb %xmm1, %xmm1
    421 ; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm2
    422 ; X32-SSE-NEXT:    movdqa %xmm2, %xmm1
    423 ; X32-SSE-NEXT:    pandn %xmm0, %xmm1
    424 ; X32-SSE-NEXT:    paddb %xmm0, %xmm0
    425 ; X32-SSE-NEXT:    pand %xmm2, %xmm0
    426 ; X32-SSE-NEXT:    por %xmm1, %xmm0
    427 ; X32-SSE-NEXT:    retl
    428   %shift = shl <16 x i8> %a, %b
    429   ret <16 x i8> %shift
    430 }
    431 
    432 ;
    433 ; Uniform Variable Shifts
    434 ;
    435 
    436 define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
    437 ; SSE-LABEL: splatvar_shift_v2i64:
    438 ; SSE:       # %bb.0:
    439 ; SSE-NEXT:    psllq %xmm1, %xmm0
    440 ; SSE-NEXT:    retq
    441 ;
    442 ; AVX-LABEL: splatvar_shift_v2i64:
    443 ; AVX:       # %bb.0:
    444 ; AVX-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
    445 ; AVX-NEXT:    retq
    446 ;
    447 ; XOP-LABEL: splatvar_shift_v2i64:
    448 ; XOP:       # %bb.0:
    449 ; XOP-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
    450 ; XOP-NEXT:    retq
    451 ;
    452 ; AVX512-LABEL: splatvar_shift_v2i64:
    453 ; AVX512:       # %bb.0:
    454 ; AVX512-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
    455 ; AVX512-NEXT:    retq
    456 ;
    457 ; AVX512VL-LABEL: splatvar_shift_v2i64:
    458 ; AVX512VL:       # %bb.0:
    459 ; AVX512VL-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
    460 ; AVX512VL-NEXT:    retq
    461 ;
    462 ; X32-SSE-LABEL: splatvar_shift_v2i64:
    463 ; X32-SSE:       # %bb.0:
    464 ; X32-SSE-NEXT:    psllq %xmm1, %xmm0
    465 ; X32-SSE-NEXT:    retl
    466   %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
    467   %shift = shl <2 x i64> %a, %splat
    468   ret <2 x i64> %shift
    469 }
    470 
    471 define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
    472 ; SSE2-LABEL: splatvar_shift_v4i32:
    473 ; SSE2:       # %bb.0:
    474 ; SSE2-NEXT:    xorps %xmm2, %xmm2
    475 ; SSE2-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
    476 ; SSE2-NEXT:    pslld %xmm2, %xmm0
    477 ; SSE2-NEXT:    retq
    478 ;
    479 ; SSE41-LABEL: splatvar_shift_v4i32:
    480 ; SSE41:       # %bb.0:
    481 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
    482 ; SSE41-NEXT:    pslld %xmm1, %xmm0
    483 ; SSE41-NEXT:    retq
    484 ;
    485 ; AVX-LABEL: splatvar_shift_v4i32:
    486 ; AVX:       # %bb.0:
    487 ; AVX-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
    488 ; AVX-NEXT:    vpslld %xmm1, %xmm0, %xmm0
    489 ; AVX-NEXT:    retq
    490 ;
    491 ; XOP-LABEL: splatvar_shift_v4i32:
    492 ; XOP:       # %bb.0:
    493 ; XOP-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
    494 ; XOP-NEXT:    vpslld %xmm1, %xmm0, %xmm0
    495 ; XOP-NEXT:    retq
    496 ;
    497 ; AVX512-LABEL: splatvar_shift_v4i32:
    498 ; AVX512:       # %bb.0:
    499 ; AVX512-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
    500 ; AVX512-NEXT:    vpslld %xmm1, %xmm0, %xmm0
    501 ; AVX512-NEXT:    retq
    502 ;
    503 ; AVX512VL-LABEL: splatvar_shift_v4i32:
    504 ; AVX512VL:       # %bb.0:
    505 ; AVX512VL-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
    506 ; AVX512VL-NEXT:    vpslld %xmm1, %xmm0, %xmm0
    507 ; AVX512VL-NEXT:    retq
    508 ;
    509 ; X32-SSE-LABEL: splatvar_shift_v4i32:
    510 ; X32-SSE:       # %bb.0:
    511 ; X32-SSE-NEXT:    xorps %xmm2, %xmm2
    512 ; X32-SSE-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
    513 ; X32-SSE-NEXT:    pslld %xmm2, %xmm0
    514 ; X32-SSE-NEXT:    retl
    515   %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
    516   %shift = shl <4 x i32> %a, %splat
    517   ret <4 x i32> %shift
    518 }
    519 
    520 define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
    521 ; SSE2-LABEL: splatvar_shift_v8i16:
    522 ; SSE2:       # %bb.0:
    523 ; SSE2-NEXT:    pextrw $0, %xmm1, %eax
    524 ; SSE2-NEXT:    movd %eax, %xmm1
    525 ; SSE2-NEXT:    psllw %xmm1, %xmm0
    526 ; SSE2-NEXT:    retq
    527 ;
    528 ; SSE41-LABEL: splatvar_shift_v8i16:
    529 ; SSE41:       # %bb.0:
    530 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
    531 ; SSE41-NEXT:    psllw %xmm1, %xmm0
    532 ; SSE41-NEXT:    retq
    533 ;
    534 ; AVX-LABEL: splatvar_shift_v8i16:
    535 ; AVX:       # %bb.0:
    536 ; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
    537 ; AVX-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
    538 ; AVX-NEXT:    retq
    539 ;
    540 ; XOP-LABEL: splatvar_shift_v8i16:
    541 ; XOP:       # %bb.0:
    542 ; XOP-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
    543 ; XOP-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
    544 ; XOP-NEXT:    retq
    545 ;
    546 ; AVX512-LABEL: splatvar_shift_v8i16:
    547 ; AVX512:       # %bb.0:
    548 ; AVX512-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
    549 ; AVX512-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
    550 ; AVX512-NEXT:    retq
    551 ;
    552 ; AVX512VL-LABEL: splatvar_shift_v8i16:
    553 ; AVX512VL:       # %bb.0:
    554 ; AVX512VL-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
    555 ; AVX512VL-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
    556 ; AVX512VL-NEXT:    retq
    557 ;
    558 ; X32-SSE-LABEL: splatvar_shift_v8i16:
    559 ; X32-SSE:       # %bb.0:
    560 ; X32-SSE-NEXT:    pextrw $0, %xmm1, %eax
    561 ; X32-SSE-NEXT:    movd %eax, %xmm1
    562 ; X32-SSE-NEXT:    psllw %xmm1, %xmm0
    563 ; X32-SSE-NEXT:    retl
    564   %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
    565   %shift = shl <8 x i16> %a, %splat
    566   ret <8 x i16> %shift
    567 }
    568 
    569 define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
    570 ; SSE2-LABEL: splatvar_shift_v16i8:
    571 ; SSE2:       # %bb.0:
    572 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    573 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7]
    574 ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,0,0,0]
    575 ; SSE2-NEXT:    psllw $5, %xmm2
    576 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    577 ; SSE2-NEXT:    pxor %xmm3, %xmm3
    578 ; SSE2-NEXT:    pcmpgtb %xmm2, %xmm3
    579 ; SSE2-NEXT:    movdqa %xmm3, %xmm4
    580 ; SSE2-NEXT:    pandn %xmm0, %xmm4
    581 ; SSE2-NEXT:    psllw $4, %xmm0
    582 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
    583 ; SSE2-NEXT:    pand %xmm3, %xmm0
    584 ; SSE2-NEXT:    por %xmm4, %xmm0
    585 ; SSE2-NEXT:    paddb %xmm2, %xmm2
    586 ; SSE2-NEXT:    pxor %xmm3, %xmm3
    587 ; SSE2-NEXT:    pcmpgtb %xmm2, %xmm3
    588 ; SSE2-NEXT:    movdqa %xmm3, %xmm4
    589 ; SSE2-NEXT:    pandn %xmm0, %xmm4
    590 ; SSE2-NEXT:    psllw $2, %xmm0
    591 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
    592 ; SSE2-NEXT:    pand %xmm3, %xmm0
    593 ; SSE2-NEXT:    por %xmm4, %xmm0
    594 ; SSE2-NEXT:    paddb %xmm2, %xmm2
    595 ; SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
    596 ; SSE2-NEXT:    movdqa %xmm1, %xmm2
    597 ; SSE2-NEXT:    pandn %xmm0, %xmm2
    598 ; SSE2-NEXT:    paddb %xmm0, %xmm0
    599 ; SSE2-NEXT:    pand %xmm1, %xmm0
    600 ; SSE2-NEXT:    por %xmm2, %xmm0
    601 ; SSE2-NEXT:    retq
    602 ;
    603 ; SSE41-LABEL: splatvar_shift_v16i8:
    604 ; SSE41:       # %bb.0:
    605 ; SSE41-NEXT:    movdqa %xmm0, %xmm2
    606 ; SSE41-NEXT:    pxor %xmm0, %xmm0
    607 ; SSE41-NEXT:    pshufb %xmm0, %xmm1
    608 ; SSE41-NEXT:    psllw $5, %xmm1
    609 ; SSE41-NEXT:    movdqa %xmm1, %xmm3
    610 ; SSE41-NEXT:    paddb %xmm1, %xmm3
    611 ; SSE41-NEXT:    movdqa %xmm2, %xmm4
    612 ; SSE41-NEXT:    psllw $4, %xmm4
    613 ; SSE41-NEXT:    pand {{.*}}(%rip), %xmm4
    614 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    615 ; SSE41-NEXT:    pblendvb %xmm0, %xmm4, %xmm2
    616 ; SSE41-NEXT:    movdqa %xmm2, %xmm1
    617 ; SSE41-NEXT:    psllw $2, %xmm1
    618 ; SSE41-NEXT:    pand {{.*}}(%rip), %xmm1
    619 ; SSE41-NEXT:    movdqa %xmm3, %xmm0
    620 ; SSE41-NEXT:    pblendvb %xmm0, %xmm1, %xmm2
    621 ; SSE41-NEXT:    movdqa %xmm2, %xmm1
    622 ; SSE41-NEXT:    paddb %xmm2, %xmm1
    623 ; SSE41-NEXT:    paddb %xmm3, %xmm3
    624 ; SSE41-NEXT:    movdqa %xmm3, %xmm0
    625 ; SSE41-NEXT:    pblendvb %xmm0, %xmm1, %xmm2
    626 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
    627 ; SSE41-NEXT:    retq
    628 ;
    629 ; AVX1-LABEL: splatvar_shift_v16i8:
    630 ; AVX1:       # %bb.0:
    631 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
    632 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    633 ; AVX1-NEXT:    vpsllw $5, %xmm1, %xmm1
    634 ; AVX1-NEXT:    vpaddb %xmm1, %xmm1, %xmm2
    635 ; AVX1-NEXT:    vpsllw $4, %xmm0, %xmm3
    636 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm3, %xmm3
    637 ; AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
    638 ; AVX1-NEXT:    vpsllw $2, %xmm0, %xmm1
    639 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
    640 ; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
    641 ; AVX1-NEXT:    vpaddb %xmm0, %xmm0, %xmm1
    642 ; AVX1-NEXT:    vpaddb %xmm2, %xmm2, %xmm2
    643 ; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
    644 ; AVX1-NEXT:    retq
    645 ;
    646 ; AVX2-LABEL: splatvar_shift_v16i8:
    647 ; AVX2:       # %bb.0:
    648 ; AVX2-NEXT:    vpbroadcastb %xmm1, %xmm1
    649 ; AVX2-NEXT:    vpsllw $5, %xmm1, %xmm1
    650 ; AVX2-NEXT:    vpsllw $4, %xmm0, %xmm2
    651 ; AVX2-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
    652 ; AVX2-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
    653 ; AVX2-NEXT:    vpsllw $2, %xmm0, %xmm2
    654 ; AVX2-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
    655 ; AVX2-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
    656 ; AVX2-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
    657 ; AVX2-NEXT:    vpaddb %xmm0, %xmm0, %xmm2
    658 ; AVX2-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
    659 ; AVX2-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
    660 ; AVX2-NEXT:    retq
    661 ;
    662 ; XOPAVX1-LABEL: splatvar_shift_v16i8:
    663 ; XOPAVX1:       # %bb.0:
    664 ; XOPAVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
    665 ; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    666 ; XOPAVX1-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
    667 ; XOPAVX1-NEXT:    retq
    668 ;
    669 ; XOPAVX2-LABEL: splatvar_shift_v16i8:
    670 ; XOPAVX2:       # %bb.0:
    671 ; XOPAVX2-NEXT:    vpbroadcastb %xmm1, %xmm1
    672 ; XOPAVX2-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
    673 ; XOPAVX2-NEXT:    retq
    674 ;
    675 ; AVX512DQ-LABEL: splatvar_shift_v16i8:
    676 ; AVX512DQ:       # %bb.0:
    677 ; AVX512DQ-NEXT:    vpbroadcastb %xmm1, %xmm1
    678 ; AVX512DQ-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
    679 ; AVX512DQ-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
    680 ; AVX512DQ-NEXT:    vpsllvd %zmm1, %zmm0, %zmm0
    681 ; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
    682 ; AVX512DQ-NEXT:    vzeroupper
    683 ; AVX512DQ-NEXT:    retq
    684 ;
    685 ; AVX512BW-LABEL: splatvar_shift_v16i8:
    686 ; AVX512BW:       # %bb.0:
    687 ; AVX512BW-NEXT:    vpbroadcastb %xmm1, %xmm1
    688 ; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    689 ; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
    690 ; AVX512BW-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0
    691 ; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
    692 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
    693 ; AVX512BW-NEXT:    vzeroupper
    694 ; AVX512BW-NEXT:    retq
    695 ;
    696 ; AVX512DQVL-LABEL: splatvar_shift_v16i8:
    697 ; AVX512DQVL:       # %bb.0:
    698 ; AVX512DQVL-NEXT:    vpbroadcastb %xmm1, %xmm1
    699 ; AVX512DQVL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
    700 ; AVX512DQVL-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
    701 ; AVX512DQVL-NEXT:    vpsllvd %zmm1, %zmm0, %zmm0
    702 ; AVX512DQVL-NEXT:    vpmovdb %zmm0, %xmm0
    703 ; AVX512DQVL-NEXT:    vzeroupper
    704 ; AVX512DQVL-NEXT:    retq
    705 ;
    706 ; AVX512BWVL-LABEL: splatvar_shift_v16i8:
    707 ; AVX512BWVL:       # %bb.0:
    708 ; AVX512BWVL-NEXT:    vpbroadcastb %xmm1, %xmm1
    709 ; AVX512BWVL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    710 ; AVX512BWVL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
    711 ; AVX512BWVL-NEXT:    vpsllvw %ymm1, %ymm0, %ymm0
    712 ; AVX512BWVL-NEXT:    vpmovwb %ymm0, %xmm0
    713 ; AVX512BWVL-NEXT:    vzeroupper
    714 ; AVX512BWVL-NEXT:    retq
    715 ;
    716 ; X32-SSE-LABEL: splatvar_shift_v16i8:
    717 ; X32-SSE:       # %bb.0:
    718 ; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    719 ; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7]
    720 ; X32-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,0,0,0]
    721 ; X32-SSE-NEXT:    psllw $5, %xmm2
    722 ; X32-SSE-NEXT:    pxor %xmm1, %xmm1
    723 ; X32-SSE-NEXT:    pxor %xmm3, %xmm3
    724 ; X32-SSE-NEXT:    pcmpgtb %xmm2, %xmm3
    725 ; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
    726 ; X32-SSE-NEXT:    pandn %xmm0, %xmm4
    727 ; X32-SSE-NEXT:    psllw $4, %xmm0
    728 ; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
    729 ; X32-SSE-NEXT:    pand %xmm3, %xmm0
    730 ; X32-SSE-NEXT:    por %xmm4, %xmm0
    731 ; X32-SSE-NEXT:    paddb %xmm2, %xmm2
    732 ; X32-SSE-NEXT:    pxor %xmm3, %xmm3
    733 ; X32-SSE-NEXT:    pcmpgtb %xmm2, %xmm3
    734 ; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
    735 ; X32-SSE-NEXT:    pandn %xmm0, %xmm4
    736 ; X32-SSE-NEXT:    psllw $2, %xmm0
    737 ; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
    738 ; X32-SSE-NEXT:    pand %xmm3, %xmm0
    739 ; X32-SSE-NEXT:    por %xmm4, %xmm0
    740 ; X32-SSE-NEXT:    paddb %xmm2, %xmm2
    741 ; X32-SSE-NEXT:    pcmpgtb %xmm2, %xmm1
    742 ; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
    743 ; X32-SSE-NEXT:    pandn %xmm0, %xmm2
    744 ; X32-SSE-NEXT:    paddb %xmm0, %xmm0
    745 ; X32-SSE-NEXT:    pand %xmm1, %xmm0
    746 ; X32-SSE-NEXT:    por %xmm2, %xmm0
    747 ; X32-SSE-NEXT:    retl
    748   %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
    749   %shift = shl <16 x i8> %a, %splat
    750   ret <16 x i8> %shift
    751 }
    752 
    753 ;
    754 ; Constant Shifts
    755 ;
    756 
    757 define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind {
    758 ; SSE2-LABEL: constant_shift_v2i64:
    759 ; SSE2:       # %bb.0:
    760 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    761 ; SSE2-NEXT:    psllq $1, %xmm1
    762 ; SSE2-NEXT:    psllq $7, %xmm0
    763 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    764 ; SSE2-NEXT:    retq
    765 ;
    766 ; SSE41-LABEL: constant_shift_v2i64:
    767 ; SSE41:       # %bb.0:
    768 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
    769 ; SSE41-NEXT:    psllq $7, %xmm1
    770 ; SSE41-NEXT:    psllq $1, %xmm0
    771 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    772 ; SSE41-NEXT:    retq
    773 ;
    774 ; AVX1-LABEL: constant_shift_v2i64:
    775 ; AVX1:       # %bb.0:
    776 ; AVX1-NEXT:    vpsllq $7, %xmm0, %xmm1
    777 ; AVX1-NEXT:    vpsllq $1, %xmm0, %xmm0
    778 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    779 ; AVX1-NEXT:    retq
    780 ;
    781 ; AVX2-LABEL: constant_shift_v2i64:
    782 ; AVX2:       # %bb.0:
    783 ; AVX2-NEXT:    vpsllvq {{.*}}(%rip), %xmm0, %xmm0
    784 ; AVX2-NEXT:    retq
    785 ;
    786 ; XOPAVX1-LABEL: constant_shift_v2i64:
    787 ; XOPAVX1:       # %bb.0:
    788 ; XOPAVX1-NEXT:    vpshlq {{.*}}(%rip), %xmm0, %xmm0
    789 ; XOPAVX1-NEXT:    retq
    790 ;
    791 ; XOPAVX2-LABEL: constant_shift_v2i64:
    792 ; XOPAVX2:       # %bb.0:
    793 ; XOPAVX2-NEXT:    vpsllvq {{.*}}(%rip), %xmm0, %xmm0
    794 ; XOPAVX2-NEXT:    retq
    795 ;
    796 ; AVX512-LABEL: constant_shift_v2i64:
    797 ; AVX512:       # %bb.0:
    798 ; AVX512-NEXT:    vpsllvq {{.*}}(%rip), %xmm0, %xmm0
    799 ; AVX512-NEXT:    retq
    800 ;
    801 ; AVX512VL-LABEL: constant_shift_v2i64:
    802 ; AVX512VL:       # %bb.0:
    803 ; AVX512VL-NEXT:    vpsllvq {{.*}}(%rip), %xmm0, %xmm0
    804 ; AVX512VL-NEXT:    retq
    805 ;
    806 ; X32-SSE-LABEL: constant_shift_v2i64:
    807 ; X32-SSE:       # %bb.0:
    808 ; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
    809 ; X32-SSE-NEXT:    psllq $1, %xmm1
    810 ; X32-SSE-NEXT:    psllq $7, %xmm0
    811 ; X32-SSE-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    812 ; X32-SSE-NEXT:    retl
    813   %shift = shl <2 x i64> %a, <i64 1, i64 7>
    814   ret <2 x i64> %shift
    815 }
    816 
    817 define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) nounwind {
    818 ; SSE2-LABEL: constant_shift_v4i32:
    819 ; SSE2:       # %bb.0:
    820 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [16,32,64,128]
    821 ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
    822 ; SSE2-NEXT:    pmuludq %xmm1, %xmm0
    823 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    824 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
    825 ; SSE2-NEXT:    pmuludq %xmm2, %xmm1
    826 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
    827 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    828 ; SSE2-NEXT:    retq
    829 ;
    830 ; SSE41-LABEL: constant_shift_v4i32:
    831 ; SSE41:       # %bb.0:
    832 ; SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm0
    833 ; SSE41-NEXT:    retq
    834 ;
    835 ; AVX1-LABEL: constant_shift_v4i32:
    836 ; AVX1:       # %bb.0:
    837 ; AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
    838 ; AVX1-NEXT:    retq
    839 ;
    840 ; AVX2-LABEL: constant_shift_v4i32:
    841 ; AVX2:       # %bb.0:
    842 ; AVX2-NEXT:    vpsllvd {{.*}}(%rip), %xmm0, %xmm0
    843 ; AVX2-NEXT:    retq
    844 ;
    845 ; XOPAVX1-LABEL: constant_shift_v4i32:
    846 ; XOPAVX1:       # %bb.0:
    847 ; XOPAVX1-NEXT:    vpshld {{.*}}(%rip), %xmm0, %xmm0
    848 ; XOPAVX1-NEXT:    retq
    849 ;
    850 ; XOPAVX2-LABEL: constant_shift_v4i32:
    851 ; XOPAVX2:       # %bb.0:
    852 ; XOPAVX2-NEXT:    vpsllvd {{.*}}(%rip), %xmm0, %xmm0
    853 ; XOPAVX2-NEXT:    retq
    854 ;
    855 ; AVX512-LABEL: constant_shift_v4i32:
    856 ; AVX512:       # %bb.0:
    857 ; AVX512-NEXT:    vpsllvd {{.*}}(%rip), %xmm0, %xmm0
    858 ; AVX512-NEXT:    retq
    859 ;
    860 ; AVX512VL-LABEL: constant_shift_v4i32:
    861 ; AVX512VL:       # %bb.0:
    862 ; AVX512VL-NEXT:    vpsllvd {{.*}}(%rip), %xmm0, %xmm0
    863 ; AVX512VL-NEXT:    retq
    864 ;
    865 ; X32-SSE-LABEL: constant_shift_v4i32:
    866 ; X32-SSE:       # %bb.0:
    867 ; X32-SSE-NEXT:    movdqa {{.*#+}} xmm1 = [16,32,64,128]
    868 ; X32-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
    869 ; X32-SSE-NEXT:    pmuludq %xmm1, %xmm0
    870 ; X32-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    871 ; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
    872 ; X32-SSE-NEXT:    pmuludq %xmm2, %xmm1
    873 ; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
    874 ; X32-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    875 ; X32-SSE-NEXT:    retl
    876   %shift = shl <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
    877   ret <4 x i32> %shift
    878 }
    879 
    880 define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
    881 ; SSE-LABEL: constant_shift_v8i16:
    882 ; SSE:       # %bb.0:
    883 ; SSE-NEXT:    pmullw {{.*}}(%rip), %xmm0
    884 ; SSE-NEXT:    retq
    885 ;
    886 ; AVX-LABEL: constant_shift_v8i16:
    887 ; AVX:       # %bb.0:
    888 ; AVX-NEXT:    vpmullw {{.*}}(%rip), %xmm0, %xmm0
    889 ; AVX-NEXT:    retq
    890 ;
    891 ; XOP-LABEL: constant_shift_v8i16:
    892 ; XOP:       # %bb.0:
    893 ; XOP-NEXT:    vpshlw {{.*}}(%rip), %xmm0, %xmm0
    894 ; XOP-NEXT:    retq
    895 ;
    896 ; AVX512DQ-LABEL: constant_shift_v8i16:
    897 ; AVX512DQ:       # %bb.0:
    898 ; AVX512DQ-NEXT:    vpmullw {{.*}}(%rip), %xmm0, %xmm0
    899 ; AVX512DQ-NEXT:    retq
    900 ;
    901 ; AVX512BW-LABEL: constant_shift_v8i16:
    902 ; AVX512BW:       # %bb.0:
    903 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
    904 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7]
    905 ; AVX512BW-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0
    906 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
    907 ; AVX512BW-NEXT:    vzeroupper
    908 ; AVX512BW-NEXT:    retq
    909 ;
    910 ; AVX512DQVL-LABEL: constant_shift_v8i16:
    911 ; AVX512DQVL:       # %bb.0:
    912 ; AVX512DQVL-NEXT:    vpmullw {{.*}}(%rip), %xmm0, %xmm0
    913 ; AVX512DQVL-NEXT:    retq
    914 ;
    915 ; AVX512BWVL-LABEL: constant_shift_v8i16:
    916 ; AVX512BWVL:       # %bb.0:
    917 ; AVX512BWVL-NEXT:    vpsllvw {{.*}}(%rip), %xmm0, %xmm0
    918 ; AVX512BWVL-NEXT:    retq
    919 ;
    920 ; X32-SSE-LABEL: constant_shift_v8i16:
    921 ; X32-SSE:       # %bb.0:
    922 ; X32-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
    923 ; X32-SSE-NEXT:    retl
    924   %shift = shl <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
    925   ret <8 x i16> %shift
    926 }
    927 
    928 define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
    929 ; SSE2-LABEL: constant_shift_v16i8:
    930 ; SSE2:       # %bb.0:
    931 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,128,64,32,16,8,4,2,1]
    932 ; SSE2-NEXT:    movdqa %xmm1, %xmm2
    933 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
    934 ; SSE2-NEXT:    movdqa %xmm0, %xmm3
    935 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
    936 ; SSE2-NEXT:    pmullw %xmm2, %xmm3
    937 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
    938 ; SSE2-NEXT:    pand %xmm2, %xmm3
    939 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
    940 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    941 ; SSE2-NEXT:    pmullw %xmm1, %xmm0
    942 ; SSE2-NEXT:    pand %xmm2, %xmm0
    943 ; SSE2-NEXT:    packuswb %xmm3, %xmm0
    944 ; SSE2-NEXT:    retq
    945 ;
    946 ; SSE41-LABEL: constant_shift_v16i8:
    947 ; SSE41:       # %bb.0:
    948 ; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,128,64,32,16,8,4,2,1]
    949 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    950 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    951 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    952 ; SSE41-NEXT:    pmullw %xmm2, %xmm0
    953 ; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
    954 ; SSE41-NEXT:    pand %xmm2, %xmm0
    955 ; SSE41-NEXT:    pmullw {{.*}}(%rip), %xmm1
    956 ; SSE41-NEXT:    pand %xmm2, %xmm1
    957 ; SSE41-NEXT:    packuswb %xmm0, %xmm1
    958 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    959 ; SSE41-NEXT:    retq
    960 ;
    961 ; AVX1-LABEL: constant_shift_v16i8:
    962 ; AVX1:       # %bb.0:
    963 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,128,64,32,16,8,4,2,1]
    964 ; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    965 ; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    966 ; AVX1-NEXT:    vpmullw %xmm1, %xmm2, %xmm1
    967 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
    968 ; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
    969 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    970 ; AVX1-NEXT:    vpmullw {{.*}}(%rip), %xmm0, %xmm0
    971 ; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
    972 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
    973 ; AVX1-NEXT:    retq
    974 ;
    975 ; AVX2-LABEL: constant_shift_v16i8:
    976 ; AVX2:       # %bb.0:
    977 ; AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0
    978 ; AVX2-NEXT:    vpmullw {{.*}}(%rip), %ymm0, %ymm0
    979 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    980 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
    981 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    982 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    983 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    984 ; AVX2-NEXT:    vzeroupper
    985 ; AVX2-NEXT:    retq
    986 ;
    987 ; XOP-LABEL: constant_shift_v16i8:
    988 ; XOP:       # %bb.0:
    989 ; XOP-NEXT:    vpshlb {{.*}}(%rip), %xmm0, %xmm0
    990 ; XOP-NEXT:    retq
    991 ;
    992 ; AVX512DQ-LABEL: constant_shift_v16i8:
    993 ; AVX512DQ:       # %bb.0:
    994 ; AVX512DQ-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
    995 ; AVX512DQ-NEXT:    vpsllvd {{.*}}(%rip), %zmm0, %zmm0
    996 ; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
    997 ; AVX512DQ-NEXT:    vzeroupper
    998 ; AVX512DQ-NEXT:    retq
    999 ;
   1000 ; AVX512BW-LABEL: constant_shift_v16i8:
   1001 ; AVX512BW:       # %bb.0:
   1002 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
   1003 ; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
   1004 ; AVX512BW-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0
   1005 ; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
   1006 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
   1007 ; AVX512BW-NEXT:    vzeroupper
   1008 ; AVX512BW-NEXT:    retq
   1009 ;
   1010 ; AVX512DQVL-LABEL: constant_shift_v16i8:
   1011 ; AVX512DQVL:       # %bb.0:
   1012 ; AVX512DQVL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
   1013 ; AVX512DQVL-NEXT:    vpsllvd {{.*}}(%rip), %zmm0, %zmm0
   1014 ; AVX512DQVL-NEXT:    vpmovdb %zmm0, %xmm0
   1015 ; AVX512DQVL-NEXT:    vzeroupper
   1016 ; AVX512DQVL-NEXT:    retq
   1017 ;
   1018 ; AVX512BWVL-LABEL: constant_shift_v16i8:
   1019 ; AVX512BWVL:       # %bb.0:
   1020 ; AVX512BWVL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
   1021 ; AVX512BWVL-NEXT:    vpsllvw {{.*}}(%rip), %ymm0, %ymm0
   1022 ; AVX512BWVL-NEXT:    vpmovwb %ymm0, %xmm0
   1023 ; AVX512BWVL-NEXT:    vzeroupper
   1024 ; AVX512BWVL-NEXT:    retq
   1025 ;
   1026 ; X32-SSE-LABEL: constant_shift_v16i8:
   1027 ; X32-SSE:       # %bb.0:
   1028 ; X32-SSE-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,128,64,32,16,8,4,2,1]
   1029 ; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
   1030 ; X32-SSE-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
   1031 ; X32-SSE-NEXT:    movdqa %xmm0, %xmm3
   1032 ; X32-SSE-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
   1033 ; X32-SSE-NEXT:    pmullw %xmm2, %xmm3
   1034 ; X32-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
   1035 ; X32-SSE-NEXT:    pand %xmm2, %xmm3
   1036 ; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
   1037 ; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
   1038 ; X32-SSE-NEXT:    pmullw %xmm1, %xmm0
   1039 ; X32-SSE-NEXT:    pand %xmm2, %xmm0
   1040 ; X32-SSE-NEXT:    packuswb %xmm3, %xmm0
   1041 ; X32-SSE-NEXT:    retl
   1042   %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
   1043   ret <16 x i8> %shift
   1044 }
   1045 
   1046 ;
   1047 ; Uniform Constant Shifts
   1048 ;
   1049 
   1050 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) nounwind {
   1051 ; SSE-LABEL: splatconstant_shift_v2i64:
   1052 ; SSE:       # %bb.0:
   1053 ; SSE-NEXT:    psllq $7, %xmm0
   1054 ; SSE-NEXT:    retq
   1055 ;
   1056 ; AVX-LABEL: splatconstant_shift_v2i64:
   1057 ; AVX:       # %bb.0:
   1058 ; AVX-NEXT:    vpsllq $7, %xmm0, %xmm0
   1059 ; AVX-NEXT:    retq
   1060 ;
   1061 ; XOP-LABEL: splatconstant_shift_v2i64:
   1062 ; XOP:       # %bb.0:
   1063 ; XOP-NEXT:    vpsllq $7, %xmm0, %xmm0
   1064 ; XOP-NEXT:    retq
   1065 ;
   1066 ; AVX512-LABEL: splatconstant_shift_v2i64:
   1067 ; AVX512:       # %bb.0:
   1068 ; AVX512-NEXT:    vpsllq $7, %xmm0, %xmm0
   1069 ; AVX512-NEXT:    retq
   1070 ;
   1071 ; AVX512VL-LABEL: splatconstant_shift_v2i64:
   1072 ; AVX512VL:       # %bb.0:
   1073 ; AVX512VL-NEXT:    vpsllq $7, %xmm0, %xmm0
   1074 ; AVX512VL-NEXT:    retq
   1075 ;
   1076 ; X32-SSE-LABEL: splatconstant_shift_v2i64:
   1077 ; X32-SSE:       # %bb.0:
   1078 ; X32-SSE-NEXT:    psllq $7, %xmm0
   1079 ; X32-SSE-NEXT:    retl
   1080   %shift = shl <2 x i64> %a, <i64 7, i64 7>
   1081   ret <2 x i64> %shift
   1082 }
   1083 
   1084 define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) nounwind {
   1085 ; SSE-LABEL: splatconstant_shift_v4i32:
   1086 ; SSE:       # %bb.0:
   1087 ; SSE-NEXT:    pslld $5, %xmm0
   1088 ; SSE-NEXT:    retq
   1089 ;
   1090 ; AVX-LABEL: splatconstant_shift_v4i32:
   1091 ; AVX:       # %bb.0:
   1092 ; AVX-NEXT:    vpslld $5, %xmm0, %xmm0
   1093 ; AVX-NEXT:    retq
   1094 ;
   1095 ; XOP-LABEL: splatconstant_shift_v4i32:
   1096 ; XOP:       # %bb.0:
   1097 ; XOP-NEXT:    vpslld $5, %xmm0, %xmm0
   1098 ; XOP-NEXT:    retq
   1099 ;
   1100 ; AVX512-LABEL: splatconstant_shift_v4i32:
   1101 ; AVX512:       # %bb.0:
   1102 ; AVX512-NEXT:    vpslld $5, %xmm0, %xmm0
   1103 ; AVX512-NEXT:    retq
   1104 ;
   1105 ; AVX512VL-LABEL: splatconstant_shift_v4i32:
   1106 ; AVX512VL:       # %bb.0:
   1107 ; AVX512VL-NEXT:    vpslld $5, %xmm0, %xmm0
   1108 ; AVX512VL-NEXT:    retq
   1109 ;
   1110 ; X32-SSE-LABEL: splatconstant_shift_v4i32:
   1111 ; X32-SSE:       # %bb.0:
   1112 ; X32-SSE-NEXT:    pslld $5, %xmm0
   1113 ; X32-SSE-NEXT:    retl
   1114   %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
   1115   ret <4 x i32> %shift
   1116 }
   1117 
   1118 define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) nounwind {
   1119 ; SSE-LABEL: splatconstant_shift_v8i16:
   1120 ; SSE:       # %bb.0:
   1121 ; SSE-NEXT:    psllw $3, %xmm0
   1122 ; SSE-NEXT:    retq
   1123 ;
   1124 ; AVX-LABEL: splatconstant_shift_v8i16:
   1125 ; AVX:       # %bb.0:
   1126 ; AVX-NEXT:    vpsllw $3, %xmm0, %xmm0
   1127 ; AVX-NEXT:    retq
   1128 ;
   1129 ; XOP-LABEL: splatconstant_shift_v8i16:
   1130 ; XOP:       # %bb.0:
   1131 ; XOP-NEXT:    vpsllw $3, %xmm0, %xmm0
   1132 ; XOP-NEXT:    retq
   1133 ;
   1134 ; AVX512-LABEL: splatconstant_shift_v8i16:
   1135 ; AVX512:       # %bb.0:
   1136 ; AVX512-NEXT:    vpsllw $3, %xmm0, %xmm0
   1137 ; AVX512-NEXT:    retq
   1138 ;
   1139 ; AVX512VL-LABEL: splatconstant_shift_v8i16:
   1140 ; AVX512VL:       # %bb.0:
   1141 ; AVX512VL-NEXT:    vpsllw $3, %xmm0, %xmm0
   1142 ; AVX512VL-NEXT:    retq
   1143 ;
   1144 ; X32-SSE-LABEL: splatconstant_shift_v8i16:
   1145 ; X32-SSE:       # %bb.0:
   1146 ; X32-SSE-NEXT:    psllw $3, %xmm0
   1147 ; X32-SSE-NEXT:    retl
   1148   %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   1149   ret <8 x i16> %shift
   1150 }
   1151 
   1152 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind {
   1153 ; SSE-LABEL: splatconstant_shift_v16i8:
   1154 ; SSE:       # %bb.0:
   1155 ; SSE-NEXT:    psllw $3, %xmm0
   1156 ; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
   1157 ; SSE-NEXT:    retq
   1158 ;
   1159 ; AVX-LABEL: splatconstant_shift_v16i8:
   1160 ; AVX:       # %bb.0:
   1161 ; AVX-NEXT:    vpsllw $3, %xmm0, %xmm0
   1162 ; AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
   1163 ; AVX-NEXT:    retq
   1164 ;
   1165 ; XOP-LABEL: splatconstant_shift_v16i8:
   1166 ; XOP:       # %bb.0:
   1167 ; XOP-NEXT:    vpshlb {{.*}}(%rip), %xmm0, %xmm0
   1168 ; XOP-NEXT:    retq
   1169 ;
   1170 ; AVX512-LABEL: splatconstant_shift_v16i8:
   1171 ; AVX512:       # %bb.0:
   1172 ; AVX512-NEXT:    vpsllw $3, %xmm0, %xmm0
   1173 ; AVX512-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
   1174 ; AVX512-NEXT:    retq
   1175 ;
   1176 ; AVX512VL-LABEL: splatconstant_shift_v16i8:
   1177 ; AVX512VL:       # %bb.0:
   1178 ; AVX512VL-NEXT:    vpsllw $3, %xmm0, %xmm0
   1179 ; AVX512VL-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
   1180 ; AVX512VL-NEXT:    retq
   1181 ;
   1182 ; X32-SSE-LABEL: splatconstant_shift_v16i8:
   1183 ; X32-SSE:       # %bb.0:
   1184 ; X32-SSE-NEXT:    psllw $3, %xmm0
   1185 ; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
   1186 ; X32-SSE-NEXT:    retl
   1187   %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   1188   ret <16 x i8> %shift
   1189 }
   1190