Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ
      3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
      4 
      5 ;
      6 ; Variable Shifts
      7 ;
      8 
      9 define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
     10 ; ALL-LABEL: var_shift_v8i64:
     11 ; ALL:       ## BB#0:
     12 ; ALL-NEXT:    vpsrlvq %zmm1, %zmm0, %zmm0
     13 ; ALL-NEXT:    retq
     14   %shift = lshr <8 x i64> %a, %b
     15   ret <8 x i64> %shift
     16 }
     17 
     18 define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
     19 ; ALL-LABEL: var_shift_v16i32:
     20 ; ALL:       ## BB#0:
     21 ; ALL-NEXT:    vpsrlvd %zmm1, %zmm0, %zmm0
     22 ; ALL-NEXT:    retq
     23   %shift = lshr <16 x i32> %a, %b
     24   ret <16 x i32> %shift
     25 }
     26 
     27 define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
     28 ; AVX512DQ-LABEL: var_shift_v32i16:
     29 ; AVX512DQ:       ## BB#0:
     30 ; AVX512DQ-NEXT:    vpxor %ymm4, %ymm4, %ymm4
     31 ; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15]
     32 ; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm6 = ymm4[4],ymm0[4],ymm4[5],ymm0[5],ymm4[6],ymm0[6],ymm4[7],ymm0[7],ymm4[12],ymm0[12],ymm4[13],ymm0[13],ymm4[14],ymm0[14],ymm4[15],ymm0[15]
     33 ; AVX512DQ-NEXT:    vpsrlvd %ymm5, %ymm6, %ymm5
     34 ; AVX512DQ-NEXT:    vpsrld $16, %ymm5, %ymm5
     35 ; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm2 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11]
     36 ; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm4[0],ymm0[0],ymm4[1],ymm0[1],ymm4[2],ymm0[2],ymm4[3],ymm0[3],ymm4[8],ymm0[8],ymm4[9],ymm0[9],ymm4[10],ymm0[10],ymm4[11],ymm0[11]
     37 ; AVX512DQ-NEXT:    vpsrlvd %ymm2, %ymm0, %ymm0
     38 ; AVX512DQ-NEXT:    vpsrld $16, %ymm0, %ymm0
     39 ; AVX512DQ-NEXT:    vpackusdw %ymm5, %ymm0, %ymm0
     40 ; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm2 = ymm3[4],ymm4[4],ymm3[5],ymm4[5],ymm3[6],ymm4[6],ymm3[7],ymm4[7],ymm3[12],ymm4[12],ymm3[13],ymm4[13],ymm3[14],ymm4[14],ymm3[15],ymm4[15]
     41 ; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm5 = ymm4[4],ymm1[4],ymm4[5],ymm1[5],ymm4[6],ymm1[6],ymm4[7],ymm1[7],ymm4[12],ymm1[12],ymm4[13],ymm1[13],ymm4[14],ymm1[14],ymm4[15],ymm1[15]
     42 ; AVX512DQ-NEXT:    vpsrlvd %ymm2, %ymm5, %ymm2
     43 ; AVX512DQ-NEXT:    vpsrld $16, %ymm2, %ymm2
     44 ; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm3 = ymm3[0],ymm4[0],ymm3[1],ymm4[1],ymm3[2],ymm4[2],ymm3[3],ymm4[3],ymm3[8],ymm4[8],ymm3[9],ymm4[9],ymm3[10],ymm4[10],ymm3[11],ymm4[11]
     45 ; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[8],ymm1[8],ymm4[9],ymm1[9],ymm4[10],ymm1[10],ymm4[11],ymm1[11]
     46 ; AVX512DQ-NEXT:    vpsrlvd %ymm3, %ymm1, %ymm1
     47 ; AVX512DQ-NEXT:    vpsrld $16, %ymm1, %ymm1
     48 ; AVX512DQ-NEXT:    vpackusdw %ymm2, %ymm1, %ymm1
     49 ; AVX512DQ-NEXT:    retq
     50 ;
     51 ; AVX512BW-LABEL: var_shift_v32i16:
     52 ; AVX512BW:       ## BB#0:
     53 ; AVX512BW-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0
     54 ; AVX512BW-NEXT:    retq
     55   %shift = lshr <32 x i16> %a, %b
     56   ret <32 x i16> %shift
     57 }
     58 
     59 define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
     60 ; AVX512DQ-LABEL: var_shift_v64i8:
     61 ; AVX512DQ:       ## BB#0:
     62 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm4
     63 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
     64 ; AVX512DQ-NEXT:    vpand %ymm5, %ymm4, %ymm4
     65 ; AVX512DQ-NEXT:    vpsllw $5, %ymm2, %ymm2
     66 ; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
     67 ; AVX512DQ-NEXT:    vpsrlw $2, %ymm0, %ymm4
     68 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
     69 ; AVX512DQ-NEXT:    vpand %ymm6, %ymm4, %ymm4
     70 ; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
     71 ; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
     72 ; AVX512DQ-NEXT:    vpsrlw $1, %ymm0, %ymm4
     73 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
     74 ; AVX512DQ-NEXT:    vpand %ymm7, %ymm4, %ymm4
     75 ; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
     76 ; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
     77 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm2
     78 ; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
     79 ; AVX512DQ-NEXT:    vpsllw $5, %ymm3, %ymm3
     80 ; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
     81 ; AVX512DQ-NEXT:    vpsrlw $2, %ymm1, %ymm2
     82 ; AVX512DQ-NEXT:    vpand %ymm6, %ymm2, %ymm2
     83 ; AVX512DQ-NEXT:    vpaddb %ymm3, %ymm3, %ymm3
     84 ; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
     85 ; AVX512DQ-NEXT:    vpsrlw $1, %ymm1, %ymm2
     86 ; AVX512DQ-NEXT:    vpand %ymm7, %ymm2, %ymm2
     87 ; AVX512DQ-NEXT:    vpaddb %ymm3, %ymm3, %ymm3
     88 ; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
     89 ; AVX512DQ-NEXT:    retq
     90 
     91   %shift = lshr <64 x i8> %a, %b
     92   ret <64 x i8> %shift
     93 }
     94 
     95 ;
     96 ; Uniform Variable Shifts
     97 ;
     98 
     99 define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
    100 ; ALL-LABEL: splatvar_shift_v8i64:
    101 ; ALL:       ## BB#0:
    102 ; ALL-NEXT:    vpsrlq %xmm1, %zmm0, %zmm0
    103 ; ALL-NEXT:    retq
    104   %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
    105   %shift = lshr <8 x i64> %a, %splat
    106   ret <8 x i64> %shift
    107 }
    108 
    109 define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
    110 ; ALL-LABEL: splatvar_shift_v16i32:
    111 ; ALL:       ## BB#0:
    112 ; ALL-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    113 ; ALL-NEXT:    vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
    114 ; ALL-NEXT:    vpsrld %xmm1, %zmm0, %zmm0
    115 ; ALL-NEXT:    retq
    116   %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
    117   %shift = lshr <16 x i32> %a, %splat
    118   ret <16 x i32> %shift
    119 }
    120 
    121 define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
    122 ; AVX512DQ-LABEL: splatvar_shift_v32i16:
    123 ; AVX512DQ:       ## BB#0:
    124 ; AVX512DQ-NEXT:    vmovd %xmm2, %eax
    125 ; AVX512DQ-NEXT:    movzwl %ax, %eax
    126 ; AVX512DQ-NEXT:    vmovd %eax, %xmm2
    127 ; AVX512DQ-NEXT:    vpsrlw %xmm2, %ymm0, %ymm0
    128 ; AVX512DQ-NEXT:    vpsrlw %xmm2, %ymm1, %ymm1
    129 ; AVX512DQ-NEXT:    retq
    130 ;
    131 ; AVX512BW-LABEL: splatvar_shift_v32i16:
    132 ; AVX512BW:       ## BB#0:
    133 ; AVX512BW-NEXT:    vmovd %xmm1, %eax
    134 ; AVX512BW-NEXT:    movzwl %ax, %eax
    135 ; AVX512BW-NEXT:    vmovd %eax, %xmm1
    136 ; AVX512BW-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0
    137 ; AVX512BW-NEXT:    retq
    138   %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
    139   %shift = lshr <32 x i16> %a, %splat
    140   ret <32 x i16> %shift
    141 }
    142 
    143 define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
    144 ; AVX512DQ-LABEL: splatvar_shift_v64i8:
    145 ; AVX512DQ:       ## BB#0:
    146 ; AVX512DQ-NEXT:    vpbroadcastb %xmm2, %ymm2
    147 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm3
    148 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    149 ; AVX512DQ-NEXT:    vpand %ymm4, %ymm3, %ymm3
    150 ; AVX512DQ-NEXT:    vpsllw $5, %ymm2, %ymm2
    151 ; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
    152 ; AVX512DQ-NEXT:    vpsrlw $2, %ymm0, %ymm3
    153 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
    154 ; AVX512DQ-NEXT:    vpand %ymm5, %ymm3, %ymm3
    155 ; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm6
    156 ; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm3, %ymm0, %ymm0
    157 ; AVX512DQ-NEXT:    vpsrlw $1, %ymm0, %ymm3
    158 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
    159 ; AVX512DQ-NEXT:    vpand %ymm7, %ymm3, %ymm3
    160 ; AVX512DQ-NEXT:    vpaddb %ymm6, %ymm6, %ymm8
    161 ; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm3, %ymm0, %ymm0
    162 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm3
    163 ; AVX512DQ-NEXT:    vpand %ymm4, %ymm3, %ymm3
    164 ; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
    165 ; AVX512DQ-NEXT:    vpsrlw $2, %ymm1, %ymm2
    166 ; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
    167 ; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
    168 ; AVX512DQ-NEXT:    vpsrlw $1, %ymm1, %ymm2
    169 ; AVX512DQ-NEXT:    vpand %ymm7, %ymm2, %ymm2
    170 ; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm2, %ymm1, %ymm1
    171 ; AVX512DQ-NEXT:    retq
    172   %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
    173   %shift = lshr <64 x i8> %a, %splat
    174   ret <64 x i8> %shift
    175 }
    176 
    177 ;
    178 ; Constant Shifts
    179 ;
    180 
    181 define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
    182 ; ALL-LABEL: constant_shift_v8i64:
    183 ; ALL:       ## BB#0:
    184 ; ALL-NEXT:    vpsrlvq {{.*}}(%rip), %zmm0, %zmm0
    185 ; ALL-NEXT:    retq
    186   %shift = lshr <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62>
    187   ret <8 x i64> %shift
    188 }
    189 
    190 define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind {
    191 ; ALL-LABEL: constant_shift_v16i32:
    192 ; ALL:       ## BB#0:
    193 ; ALL-NEXT:    vpsrlvd {{.*}}(%rip), %zmm0, %zmm0
    194 ; ALL-NEXT:    retq
    195   %shift = lshr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
    196   ret <16 x i32> %shift
    197 }
    198 
    199 define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
    200 ; AVX512DQ-LABEL: constant_shift_v32i16:
    201 ; AVX512DQ:       ## BB#0:
    202 ; AVX512DQ-NEXT:    vpxor %ymm2, %ymm2, %ymm2
    203 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
    204 ; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm4 = ymm3[4],ymm2[4],ymm3[5],ymm2[5],ymm3[6],ymm2[6],ymm3[7],ymm2[7],ymm3[12],ymm2[12],ymm3[13],ymm2[13],ymm3[14],ymm2[14],ymm3[15],ymm2[15]
    205 ; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
    206 ; AVX512DQ-NEXT:    vpsrlvd %ymm4, %ymm5, %ymm5
    207 ; AVX512DQ-NEXT:    vpsrld $16, %ymm5, %ymm5
    208 ; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm3 = ymm3[0],ymm2[0],ymm3[1],ymm2[1],ymm3[2],ymm2[2],ymm3[3],ymm2[3],ymm3[8],ymm2[8],ymm3[9],ymm2[9],ymm3[10],ymm2[10],ymm3[11],ymm2[11]
    209 ; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
    210 ; AVX512DQ-NEXT:    vpsrlvd %ymm3, %ymm0, %ymm0
    211 ; AVX512DQ-NEXT:    vpsrld $16, %ymm0, %ymm0
    212 ; AVX512DQ-NEXT:    vpackusdw %ymm5, %ymm0, %ymm0
    213 ; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15]
    214 ; AVX512DQ-NEXT:    vpsrlvd %ymm4, %ymm5, %ymm4
    215 ; AVX512DQ-NEXT:    vpsrld $16, %ymm4, %ymm4
    216 ; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[8],ymm1[8],ymm2[9],ymm1[9],ymm2[10],ymm1[10],ymm2[11],ymm1[11]
    217 ; AVX512DQ-NEXT:    vpsrlvd %ymm3, %ymm1, %ymm1
    218 ; AVX512DQ-NEXT:    vpsrld $16, %ymm1, %ymm1
    219 ; AVX512DQ-NEXT:    vpackusdw %ymm4, %ymm1, %ymm1
    220 ; AVX512DQ-NEXT:    retq
    221 ;
    222 ; AVX512BW-LABEL: constant_shift_v32i16:
    223 ; AVX512BW:       ## BB#0:
    224 ; AVX512BW-NEXT:    vpsrlvw {{.*}}(%rip), %zmm0, %zmm0
    225 ; AVX512BW-NEXT:    retq
    226   %shift = lshr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
    227   ret <32 x i16> %shift
    228 }
    229 
    230 define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
    231 ; AVX512DQ-LABEL: constant_shift_v64i8:
    232 ; AVX512DQ:       ## BB#0:
    233 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm2
    234 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    235 ; AVX512DQ-NEXT:    vpand %ymm3, %ymm2, %ymm2
    236 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
    237 ; AVX512DQ-NEXT:    vpsllw $5, %ymm4, %ymm4
    238 ; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
    239 ; AVX512DQ-NEXT:    vpsrlw $2, %ymm0, %ymm2
    240 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
    241 ; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
    242 ; AVX512DQ-NEXT:    vpaddb %ymm4, %ymm4, %ymm6
    243 ; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm0, %ymm0
    244 ; AVX512DQ-NEXT:    vpsrlw $1, %ymm0, %ymm2
    245 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
    246 ; AVX512DQ-NEXT:    vpand %ymm7, %ymm2, %ymm2
    247 ; AVX512DQ-NEXT:    vpaddb %ymm6, %ymm6, %ymm8
    248 ; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm2, %ymm0, %ymm0
    249 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm2
    250 ; AVX512DQ-NEXT:    vpand %ymm3, %ymm2, %ymm2
    251 ; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm2, %ymm1, %ymm1
    252 ; AVX512DQ-NEXT:    vpsrlw $2, %ymm1, %ymm2
    253 ; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
    254 ; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
    255 ; AVX512DQ-NEXT:    vpsrlw $1, %ymm1, %ymm2
    256 ; AVX512DQ-NEXT:    vpand %ymm7, %ymm2, %ymm2
    257 ; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm2, %ymm1, %ymm1
    258 ; AVX512DQ-NEXT:    retq
    259   %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
    260   ret <64 x i8> %shift
    261 }
    262 
    263 ;
    264 ; Uniform Constant Shifts
    265 ;
    266 
    267 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) nounwind {
    268 ; ALL-LABEL: splatconstant_shift_v8i64:
    269 ; ALL:       ## BB#0:
    270 ; ALL-NEXT:    vpsrlq $7, %zmm0, %zmm0
    271 ; ALL-NEXT:    retq
    272   %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
    273   ret <8 x i64> %shift
    274 }
    275 
    276 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) nounwind {
    277 ; ALL-LABEL: splatconstant_shift_v16i32:
    278 ; ALL:       ## BB#0:
    279 ; ALL-NEXT:    vpsrld $5, %zmm0, %zmm0
    280 ; ALL-NEXT:    retq
    281   %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    282   ret <16 x i32> %shift
    283 }
    284 
    285 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
    286 ; AVX512DQ-LABEL: splatconstant_shift_v32i16:
    287 ; AVX512DQ:       ## BB#0:
    288 ; AVX512DQ-NEXT:    vpsrlw $3, %ymm0, %ymm0
    289 ; AVX512DQ-NEXT:    vpsrlw $3, %ymm1, %ymm1
    290 ; AVX512DQ-NEXT:    retq
    291 ;
    292 ; AVX512BW-LABEL: splatconstant_shift_v32i16:
    293 ; AVX512BW:       ## BB#0:
    294 ; AVX512BW-NEXT:    vpsrlw $3, %zmm0, %zmm0
    295 ; AVX512BW-NEXT:    retq
    296   %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
    297   ret <32 x i16> %shift
    298 }
    299 
    300 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
    301 ; AVX512DQ-LABEL: splatconstant_shift_v64i8:
    302 ; AVX512DQ:       ## BB#0:
    303 ; AVX512DQ-NEXT:    vpsrlw $3, %ymm0, %ymm0
    304 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
    305 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm0
    306 ; AVX512DQ-NEXT:    vpsrlw $3, %ymm1, %ymm1
    307 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm1
    308 ; AVX512DQ-NEXT:    retq
    309 ;
    310 ; AVX512BW-LABEL: splatconstant_shift_v64i8:
    311 ; AVX512BW:       ## BB#0:
    312 ; AVX512BW-NEXT:    vpsrlw $3, %zmm0, %zmm0
    313 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
    314 ; AVX512BW-NEXT:    retq
    315   %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    316   ret <64 x i8> %shift
    317 }
    318