Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ
      3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
      4 
      5 ;
      6 ; Variable Shifts
      7 ;
      8 
      9 define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
     10 ; ALL-LABEL: var_shift_v8i64:
     11 ; ALL:       ## BB#0:
     12 ; ALL-NEXT:    vpsllvq %zmm1, %zmm0, %zmm0
     13 ; ALL-NEXT:    retq
     14   %shift = shl <8 x i64> %a, %b
     15   ret <8 x i64> %shift
     16 }
     17 
     18 define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
     19 ; ALL-LABEL: var_shift_v16i32:
     20 ; ALL:       ## BB#0:
     21 ; ALL-NEXT:    vpsllvd %zmm1, %zmm0, %zmm0
     22 ; ALL-NEXT:    retq
     23   %shift = shl <16 x i32> %a, %b
     24   ret <16 x i32> %shift
     25 }
     26 
     27 define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
     28 ; AVX512DQ-LABEL: var_shift_v32i16:
     29 ; AVX512DQ:       ## BB#0:
     30 ; AVX512DQ-NEXT:    vpxor %ymm4, %ymm4, %ymm4
     31 ; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15]
     32 ; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm6 = ymm4[4],ymm0[4],ymm4[5],ymm0[5],ymm4[6],ymm0[6],ymm4[7],ymm0[7],ymm4[12],ymm0[12],ymm4[13],ymm0[13],ymm4[14],ymm0[14],ymm4[15],ymm0[15]
     33 ; AVX512DQ-NEXT:    vpsllvd %ymm5, %ymm6, %ymm5
     34 ; AVX512DQ-NEXT:    vpsrld $16, %ymm5, %ymm5
     35 ; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm2 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11]
     36 ; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm4[0],ymm0[0],ymm4[1],ymm0[1],ymm4[2],ymm0[2],ymm4[3],ymm0[3],ymm4[8],ymm0[8],ymm4[9],ymm0[9],ymm4[10],ymm0[10],ymm4[11],ymm0[11]
     37 ; AVX512DQ-NEXT:    vpsllvd %ymm2, %ymm0, %ymm0
     38 ; AVX512DQ-NEXT:    vpsrld $16, %ymm0, %ymm0
     39 ; AVX512DQ-NEXT:    vpackusdw %ymm5, %ymm0, %ymm0
     40 ; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm2 = ymm3[4],ymm4[4],ymm3[5],ymm4[5],ymm3[6],ymm4[6],ymm3[7],ymm4[7],ymm3[12],ymm4[12],ymm3[13],ymm4[13],ymm3[14],ymm4[14],ymm3[15],ymm4[15]
     41 ; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm5 = ymm4[4],ymm1[4],ymm4[5],ymm1[5],ymm4[6],ymm1[6],ymm4[7],ymm1[7],ymm4[12],ymm1[12],ymm4[13],ymm1[13],ymm4[14],ymm1[14],ymm4[15],ymm1[15]
     42 ; AVX512DQ-NEXT:    vpsllvd %ymm2, %ymm5, %ymm2
     43 ; AVX512DQ-NEXT:    vpsrld $16, %ymm2, %ymm2
     44 ; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm3 = ymm3[0],ymm4[0],ymm3[1],ymm4[1],ymm3[2],ymm4[2],ymm3[3],ymm4[3],ymm3[8],ymm4[8],ymm3[9],ymm4[9],ymm3[10],ymm4[10],ymm3[11],ymm4[11]
     45 ; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[8],ymm1[8],ymm4[9],ymm1[9],ymm4[10],ymm1[10],ymm4[11],ymm1[11]
     46 ; AVX512DQ-NEXT:    vpsllvd %ymm3, %ymm1, %ymm1
     47 ; AVX512DQ-NEXT:    vpsrld $16, %ymm1, %ymm1
     48 ; AVX512DQ-NEXT:    vpackusdw %ymm2, %ymm1, %ymm1
     49 ; AVX512DQ-NEXT:    retq
     50 ;
     51 ; AVX512BW-LABEL: var_shift_v32i16:
     52 ; AVX512BW:       ## BB#0:
     53 ; AVX512BW-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0
     54 ; AVX512BW-NEXT:    retq
     55   %shift = shl <32 x i16> %a, %b
     56   ret <32 x i16> %shift
     57 }
     58 
     59 define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
     60 ; AVX512DQ-LABEL: var_shift_v64i8:
     61 ; AVX512DQ:       ## BB#0:
     62 ; AVX512DQ-NEXT:    vpsllw $4, %ymm0, %ymm4
     63 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
     64 ; AVX512DQ-NEXT:    vpand %ymm5, %ymm4, %ymm4
     65 ; AVX512DQ-NEXT:    vpsllw $5, %ymm2, %ymm2
     66 ; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
     67 ; AVX512DQ-NEXT:    vpsllw $2, %ymm0, %ymm4
     68 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
     69 ; AVX512DQ-NEXT:    vpand %ymm6, %ymm4, %ymm4
     70 ; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
     71 ; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
     72 ; AVX512DQ-NEXT:    vpaddb %ymm0, %ymm0, %ymm4
     73 ; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
     74 ; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
     75 ; AVX512DQ-NEXT:    vpsllw $4, %ymm1, %ymm2
     76 ; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
     77 ; AVX512DQ-NEXT:    vpsllw $5, %ymm3, %ymm3
     78 ; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
     79 ; AVX512DQ-NEXT:    vpsllw $2, %ymm1, %ymm2
     80 ; AVX512DQ-NEXT:    vpand %ymm6, %ymm2, %ymm2
     81 ; AVX512DQ-NEXT:    vpaddb %ymm3, %ymm3, %ymm3
     82 ; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
     83 ; AVX512DQ-NEXT:    vpaddb %ymm1, %ymm1, %ymm2
     84 ; AVX512DQ-NEXT:    vpaddb %ymm3, %ymm3, %ymm3
     85 ; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
     86 ; AVX512DQ-NEXT:    retq
     87   %shift = shl <64 x i8> %a, %b
     88   ret <64 x i8> %shift
     89 }
     90 
     91 ;
     92 ; Uniform Variable Shifts
     93 ;
     94 
     95 define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
     96 ; ALL-LABEL: splatvar_shift_v8i64:
     97 ; ALL:       ## BB#0:
     98 ; ALL-NEXT:    vpsllq %xmm1, %zmm0, %zmm0
     99 ; ALL-NEXT:    retq
    100   %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
    101   %shift = shl <8 x i64> %a, %splat
    102   ret <8 x i64> %shift
    103 }
    104 
    105 define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
    106 ; ALL-LABEL: splatvar_shift_v16i32:
    107 ; ALL:       ## BB#0:
    108 ; ALL-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    109 ; ALL-NEXT:    vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
    110 ; ALL-NEXT:    vpslld %xmm1, %zmm0, %zmm0
    111 ; ALL-NEXT:    retq
    112   %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
    113   %shift = shl <16 x i32> %a, %splat
    114   ret <16 x i32> %shift
    115 }
    116 
    117 define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
    118 ; AVX512DQ-LABEL: splatvar_shift_v32i16:
    119 ; AVX512DQ:       ## BB#0:
    120 ; AVX512DQ-NEXT:    vmovd %xmm2, %eax
    121 ; AVX512DQ-NEXT:    movzwl %ax, %eax
    122 ; AVX512DQ-NEXT:    vmovd %eax, %xmm2
    123 ; AVX512DQ-NEXT:    vpsllw %xmm2, %ymm0, %ymm0
    124 ; AVX512DQ-NEXT:    vpsllw %xmm2, %ymm1, %ymm1
    125 ; AVX512DQ-NEXT:    retq
    126 ;
    127 ; AVX512BW-LABEL: splatvar_shift_v32i16:
    128 ; AVX512BW:       ## BB#0:
    129 ; AVX512BW-NEXT:    vmovd %xmm1, %eax
    130 ; AVX512BW-NEXT:    movzwl %ax, %eax
    131 ; AVX512BW-NEXT:    vmovd %eax, %xmm1
    132 ; AVX512BW-NEXT:    vpsllw %xmm1, %zmm0, %zmm0
    133 ; AVX512BW-NEXT:    retq
    134   %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
    135   %shift = shl <32 x i16> %a, %splat
    136   ret <32 x i16> %shift
    137 }
    138 
    139 define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
    140 ; AVX512DQ-LABEL: splatvar_shift_v64i8:
    141 ; AVX512DQ:       ## BB#0:
    142 ; AVX512DQ-NEXT:    vpbroadcastb %xmm2, %ymm2
    143 ; AVX512DQ-NEXT:    vpsllw $4, %ymm0, %ymm3
    144 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
    145 ; AVX512DQ-NEXT:    vpand %ymm4, %ymm3, %ymm3
    146 ; AVX512DQ-NEXT:    vpsllw $5, %ymm2, %ymm2
    147 ; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
    148 ; AVX512DQ-NEXT:    vpsllw $2, %ymm0, %ymm3
    149 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
    150 ; AVX512DQ-NEXT:    vpand %ymm5, %ymm3, %ymm3
    151 ; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm6
    152 ; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm3, %ymm0, %ymm0
    153 ; AVX512DQ-NEXT:    vpaddb %ymm0, %ymm0, %ymm3
    154 ; AVX512DQ-NEXT:    vpaddb %ymm6, %ymm6, %ymm7
    155 ; AVX512DQ-NEXT:    vpblendvb %ymm7, %ymm3, %ymm0, %ymm0
    156 ; AVX512DQ-NEXT:    vpsllw $4, %ymm1, %ymm3
    157 ; AVX512DQ-NEXT:    vpand %ymm4, %ymm3, %ymm3
    158 ; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
    159 ; AVX512DQ-NEXT:    vpsllw $2, %ymm1, %ymm2
    160 ; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
    161 ; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
    162 ; AVX512DQ-NEXT:    vpaddb %ymm1, %ymm1, %ymm2
    163 ; AVX512DQ-NEXT:    vpblendvb %ymm7, %ymm2, %ymm1, %ymm1
    164 ; AVX512DQ-NEXT:    retq
    165 
    166   %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
    167   %shift = shl <64 x i8> %a, %splat
    168   ret <64 x i8> %shift
    169 }
    170 
    171 ;
    172 ; Constant Shifts
    173 ;
    174 
    175 define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
    176 ; ALL-LABEL: constant_shift_v8i64:
    177 ; ALL:       ## BB#0:
    178 ; ALL-NEXT:    vpsllvq {{.*}}(%rip), %zmm0, %zmm0
    179 ; ALL-NEXT:    retq
    180   %shift = shl <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62>
    181   ret <8 x i64> %shift
    182 }
    183 
    184 define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind {
    185 ; ALL-LABEL: constant_shift_v16i32:
    186 ; ALL:       ## BB#0:
    187 ; ALL-NEXT:    vpsllvd {{.*}}(%rip), %zmm0, %zmm0
    188 ; ALL-NEXT:    retq
    189   %shift = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
    190   ret <16 x i32> %shift
    191 }
    192 
    193 define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
    194 ; AVX512DQ-LABEL: constant_shift_v32i16:
    195 ; AVX512DQ:       ## BB#0:
    196 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
    197 ; AVX512DQ-NEXT:    vpmullw %ymm2, %ymm0, %ymm0
    198 ; AVX512DQ-NEXT:    vpmullw %ymm2, %ymm1, %ymm1
    199 ; AVX512DQ-NEXT:    retq
    200 ;
    201 ; AVX512BW-LABEL: constant_shift_v32i16:
    202 ; AVX512BW:       ## BB#0:
    203 ; AVX512BW-NEXT:    vpsllvw {{.*}}(%rip), %zmm0, %zmm0
    204 ; AVX512BW-NEXT:    retq
    205   %shift = shl <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
    206   ret <32 x i16> %shift
    207 }
    208 
    209 define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
    210 ; AVX512DQ-LABEL: constant_shift_v64i8:
    211 ; AVX512DQ:       ## BB#0:
    212 ; AVX512DQ-NEXT:    vpsllw $4, %ymm0, %ymm2
    213 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
    214 ; AVX512DQ-NEXT:    vpand %ymm3, %ymm2, %ymm2
    215 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
    216 ; AVX512DQ-NEXT:    vpsllw $5, %ymm4, %ymm4
    217 ; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
    218 ; AVX512DQ-NEXT:    vpsllw $2, %ymm0, %ymm2
    219 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
    220 ; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
    221 ; AVX512DQ-NEXT:    vpaddb %ymm4, %ymm4, %ymm6
    222 ; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm0, %ymm0
    223 ; AVX512DQ-NEXT:    vpaddb %ymm0, %ymm0, %ymm2
    224 ; AVX512DQ-NEXT:    vpaddb %ymm6, %ymm6, %ymm7
    225 ; AVX512DQ-NEXT:    vpblendvb %ymm7, %ymm2, %ymm0, %ymm0
    226 ; AVX512DQ-NEXT:    vpsllw $4, %ymm1, %ymm2
    227 ; AVX512DQ-NEXT:    vpand %ymm3, %ymm2, %ymm2
    228 ; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm2, %ymm1, %ymm1
    229 ; AVX512DQ-NEXT:    vpsllw $2, %ymm1, %ymm2
    230 ; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
    231 ; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
    232 ; AVX512DQ-NEXT:    vpaddb %ymm1, %ymm1, %ymm2
    233 ; AVX512DQ-NEXT:    vpblendvb %ymm7, %ymm2, %ymm1, %ymm1
    234 ; AVX512DQ-NEXT:    retq
    235   %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
    236   ret <64 x i8> %shift
    237 }
    238 
    239 ;
    240 ; Uniform Constant Shifts
    241 ;
    242 
    243 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) nounwind {
    244 ; ALL-LABEL: splatconstant_shift_v8i64:
    245 ; ALL:       ## BB#0:
    246 ; ALL-NEXT:    vpsllq $7, %zmm0, %zmm0
    247 ; ALL-NEXT:    retq
    248   %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
    249   ret <8 x i64> %shift
    250 }
    251 
    252 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) nounwind {
    253 ; ALL-LABEL: splatconstant_shift_v16i32:
    254 ; ALL:       ## BB#0:
    255 ; ALL-NEXT:    vpslld $5, %zmm0, %zmm0
    256 ; ALL-NEXT:    retq
    257   %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    258   ret <16 x i32> %shift
    259 }
    260 
    261 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
    262 ; AVX512DQ-LABEL: splatconstant_shift_v32i16:
    263 ; AVX512DQ:       ## BB#0:
    264 ; AVX512DQ-NEXT:    vpsllw $3, %ymm0, %ymm0
    265 ; AVX512DQ-NEXT:    vpsllw $3, %ymm1, %ymm1
    266 ; AVX512DQ-NEXT:    retq
    267 ;
    268 ; AVX512BW-LABEL: splatconstant_shift_v32i16:
    269 ; AVX512BW:       ## BB#0:
    270 ; AVX512BW-NEXT:    vpsllw $3, %zmm0, %zmm0
    271 ; AVX512BW-NEXT:    retq
    272   %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
    273   ret <32 x i16> %shift
    274 }
    275 
    276 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
    277 ; AVX512DQ-LABEL: splatconstant_shift_v64i8:
    278 ; AVX512DQ:       ## BB#0:
    279 ; AVX512DQ-NEXT:    vpsllw $3, %ymm0, %ymm0
    280 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
    281 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm0
    282 ; AVX512DQ-NEXT:    vpsllw $3, %ymm1, %ymm1
    283 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm1
    284 ; AVX512DQ-NEXT:    retq
    285 ;
    286 ; AVX512BW-LABEL: splatconstant_shift_v64i8:
    287 ; AVX512BW:       ## BB#0:
    288 ; AVX512BW-NEXT:    vpsllw $3, %zmm0, %zmm0
    289 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
    290 ; AVX512BW-NEXT:    retq
    291   %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    292   ret <64 x i8> %shift
    293 }
    294