Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
      4 
      5 ;
      6 ; Variable Shifts
      7 ;
      8 
      9 define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
     10 ; ALL-LABEL: var_shift_v8i64:
     11 ; ALL:       # %bb.0:
     12 ; ALL-NEXT:    vpsrlvq %zmm1, %zmm0, %zmm0
     13 ; ALL-NEXT:    retq
     14   %shift = lshr <8 x i64> %a, %b
     15   ret <8 x i64> %shift
     16 }
     17 
     18 define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
     19 ; ALL-LABEL: var_shift_v16i32:
     20 ; ALL:       # %bb.0:
     21 ; ALL-NEXT:    vpsrlvd %zmm1, %zmm0, %zmm0
     22 ; ALL-NEXT:    retq
     23   %shift = lshr <16 x i32> %a, %b
     24   ret <16 x i32> %shift
     25 }
     26 
     27 define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
     28 ; AVX512DQ-LABEL: var_shift_v32i16:
     29 ; AVX512DQ:       # %bb.0:
     30 ; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
     31 ; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
     32 ; AVX512DQ-NEXT:    vpsrlvd %zmm2, %zmm0, %zmm0
     33 ; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
     34 ; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} zmm2 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero
     35 ; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
     36 ; AVX512DQ-NEXT:    vpsrlvd %zmm2, %zmm1, %zmm1
     37 ; AVX512DQ-NEXT:    vpmovdw %zmm1, %ymm1
     38 ; AVX512DQ-NEXT:    retq
     39 ;
     40 ; AVX512BW-LABEL: var_shift_v32i16:
     41 ; AVX512BW:       # %bb.0:
     42 ; AVX512BW-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0
     43 ; AVX512BW-NEXT:    retq
     44   %shift = lshr <32 x i16> %a, %b
     45   ret <32 x i16> %shift
     46 }
     47 
     48 define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
     49 ; AVX512DQ-LABEL: var_shift_v64i8:
     50 ; AVX512DQ:       # %bb.0:
     51 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm4
     52 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
     53 ; AVX512DQ-NEXT:    vpand %ymm5, %ymm4, %ymm4
     54 ; AVX512DQ-NEXT:    vpsllw $5, %ymm2, %ymm2
     55 ; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
     56 ; AVX512DQ-NEXT:    vpsrlw $2, %ymm0, %ymm4
     57 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
     58 ; AVX512DQ-NEXT:    vpand %ymm6, %ymm4, %ymm4
     59 ; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
     60 ; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
     61 ; AVX512DQ-NEXT:    vpsrlw $1, %ymm0, %ymm4
     62 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
     63 ; AVX512DQ-NEXT:    vpand %ymm7, %ymm4, %ymm4
     64 ; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
     65 ; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
     66 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm2
     67 ; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
     68 ; AVX512DQ-NEXT:    vpsllw $5, %ymm3, %ymm3
     69 ; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
     70 ; AVX512DQ-NEXT:    vpsrlw $2, %ymm1, %ymm2
     71 ; AVX512DQ-NEXT:    vpand %ymm6, %ymm2, %ymm2
     72 ; AVX512DQ-NEXT:    vpaddb %ymm3, %ymm3, %ymm3
     73 ; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
     74 ; AVX512DQ-NEXT:    vpsrlw $1, %ymm1, %ymm2
     75 ; AVX512DQ-NEXT:    vpand %ymm7, %ymm2, %ymm2
     76 ; AVX512DQ-NEXT:    vpaddb %ymm3, %ymm3, %ymm3
     77 ; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
     78 ; AVX512DQ-NEXT:    retq
     79 ;
     80 ; AVX512BW-LABEL: var_shift_v64i8:
     81 ; AVX512BW:       # %bb.0:
     82 ; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm2
     83 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
     84 ; AVX512BW-NEXT:    vpsllw $5, %zmm1, %zmm1
     85 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
     86 ; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1}
     87 ; AVX512BW-NEXT:    vpsrlw $2, %zmm0, %zmm2
     88 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
     89 ; AVX512BW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
     90 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
     91 ; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1}
     92 ; AVX512BW-NEXT:    vpsrlw $1, %zmm0, %zmm2
     93 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
     94 ; AVX512BW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
     95 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
     96 ; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1}
     97 ; AVX512BW-NEXT:    retq
     98   %shift = lshr <64 x i8> %a, %b
     99   ret <64 x i8> %shift
    100 }
    101 
    102 ;
    103 ; Uniform Variable Shifts
    104 ;
    105 
    106 define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
    107 ; ALL-LABEL: splatvar_shift_v8i64:
    108 ; ALL:       # %bb.0:
    109 ; ALL-NEXT:    vpsrlq %xmm1, %zmm0, %zmm0
    110 ; ALL-NEXT:    retq
    111   %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
    112   %shift = lshr <8 x i64> %a, %splat
    113   ret <8 x i64> %shift
    114 }
    115 
    116 define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
    117 ; ALL-LABEL: splatvar_shift_v16i32:
    118 ; ALL:       # %bb.0:
    119 ; ALL-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
    120 ; ALL-NEXT:    vpsrld %xmm1, %zmm0, %zmm0
    121 ; ALL-NEXT:    retq
    122   %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
    123   %shift = lshr <16 x i32> %a, %splat
    124   ret <16 x i32> %shift
    125 }
    126 
    127 define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
    128 ; AVX512DQ-LABEL: splatvar_shift_v32i16:
    129 ; AVX512DQ:       # %bb.0:
    130 ; AVX512DQ-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
    131 ; AVX512DQ-NEXT:    vpsrlw %xmm2, %ymm0, %ymm0
    132 ; AVX512DQ-NEXT:    vpsrlw %xmm2, %ymm1, %ymm1
    133 ; AVX512DQ-NEXT:    retq
    134 ;
    135 ; AVX512BW-LABEL: splatvar_shift_v32i16:
    136 ; AVX512BW:       # %bb.0:
    137 ; AVX512BW-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
    138 ; AVX512BW-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0
    139 ; AVX512BW-NEXT:    retq
    140   %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
    141   %shift = lshr <32 x i16> %a, %splat
    142   ret <32 x i16> %shift
    143 }
    144 
    145 define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
    146 ; AVX512DQ-LABEL: splatvar_shift_v64i8:
    147 ; AVX512DQ:       # %bb.0:
    148 ; AVX512DQ-NEXT:    vpbroadcastb %xmm2, %ymm2
    149 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm3
    150 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    151 ; AVX512DQ-NEXT:    vpand %ymm4, %ymm3, %ymm3
    152 ; AVX512DQ-NEXT:    vpsllw $5, %ymm2, %ymm2
    153 ; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
    154 ; AVX512DQ-NEXT:    vpsrlw $2, %ymm0, %ymm3
    155 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
    156 ; AVX512DQ-NEXT:    vpand %ymm5, %ymm3, %ymm3
    157 ; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm6
    158 ; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm3, %ymm0, %ymm0
    159 ; AVX512DQ-NEXT:    vpsrlw $1, %ymm0, %ymm3
    160 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
    161 ; AVX512DQ-NEXT:    vpand %ymm7, %ymm3, %ymm3
    162 ; AVX512DQ-NEXT:    vpaddb %ymm6, %ymm6, %ymm8
    163 ; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm3, %ymm0, %ymm0
    164 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm3
    165 ; AVX512DQ-NEXT:    vpand %ymm4, %ymm3, %ymm3
    166 ; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
    167 ; AVX512DQ-NEXT:    vpsrlw $2, %ymm1, %ymm2
    168 ; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
    169 ; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
    170 ; AVX512DQ-NEXT:    vpsrlw $1, %ymm1, %ymm2
    171 ; AVX512DQ-NEXT:    vpand %ymm7, %ymm2, %ymm2
    172 ; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm2, %ymm1, %ymm1
    173 ; AVX512DQ-NEXT:    retq
    174 ;
    175 ; AVX512BW-LABEL: splatvar_shift_v64i8:
    176 ; AVX512BW:       # %bb.0:
    177 ; AVX512BW-NEXT:    vpbroadcastb %xmm1, %zmm1
    178 ; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm2
    179 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
    180 ; AVX512BW-NEXT:    vpsllw $5, %zmm1, %zmm1
    181 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
    182 ; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1}
    183 ; AVX512BW-NEXT:    vpsrlw $2, %zmm0, %zmm2
    184 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
    185 ; AVX512BW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
    186 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
    187 ; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1}
    188 ; AVX512BW-NEXT:    vpsrlw $1, %zmm0, %zmm2
    189 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
    190 ; AVX512BW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
    191 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
    192 ; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1}
    193 ; AVX512BW-NEXT:    retq
    194   %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
    195   %shift = lshr <64 x i8> %a, %splat
    196   ret <64 x i8> %shift
    197 }
    198 
    199 ;
    200 ; Constant Shifts
    201 ;
    202 
    203 define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
    204 ; ALL-LABEL: constant_shift_v8i64:
    205 ; ALL:       # %bb.0:
    206 ; ALL-NEXT:    vpsrlvq {{.*}}(%rip), %zmm0, %zmm0
    207 ; ALL-NEXT:    retq
    208   %shift = lshr <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62>
    209   ret <8 x i64> %shift
    210 }
    211 
    212 define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind {
    213 ; ALL-LABEL: constant_shift_v16i32:
    214 ; ALL:       # %bb.0:
    215 ; ALL-NEXT:    vpsrlvd {{.*}}(%rip), %zmm0, %zmm0
    216 ; ALL-NEXT:    retq
    217   %shift = lshr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
    218   ret <16 x i32> %shift
    219 }
    220 
    221 define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
    222 ; AVX512DQ-LABEL: constant_shift_v32i16:
    223 ; AVX512DQ:       # %bb.0:
    224 ; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
    225 ; AVX512DQ-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
    226 ; AVX512DQ-NEXT:    vpsrlvd %zmm2, %zmm0, %zmm0
    227 ; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
    228 ; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
    229 ; AVX512DQ-NEXT:    vpsrlvd %zmm2, %zmm1, %zmm1
    230 ; AVX512DQ-NEXT:    vpmovdw %zmm1, %ymm1
    231 ; AVX512DQ-NEXT:    retq
    232 ;
    233 ; AVX512BW-LABEL: constant_shift_v32i16:
    234 ; AVX512BW:       # %bb.0:
    235 ; AVX512BW-NEXT:    vpsrlvw {{.*}}(%rip), %zmm0, %zmm0
    236 ; AVX512BW-NEXT:    retq
    237   %shift = lshr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
    238   ret <32 x i16> %shift
    239 }
    240 
    241 define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
    242 ; AVX512DQ-LABEL: constant_shift_v64i8:
    243 ; AVX512DQ:       # %bb.0:
    244 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm2
    245 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    246 ; AVX512DQ-NEXT:    vpand %ymm3, %ymm2, %ymm2
    247 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
    248 ; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
    249 ; AVX512DQ-NEXT:    vpsrlw $2, %ymm0, %ymm2
    250 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
    251 ; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
    252 ; AVX512DQ-NEXT:    vpaddb %ymm4, %ymm4, %ymm6
    253 ; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm0, %ymm0
    254 ; AVX512DQ-NEXT:    vpsrlw $1, %ymm0, %ymm2
    255 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
    256 ; AVX512DQ-NEXT:    vpand %ymm7, %ymm2, %ymm2
    257 ; AVX512DQ-NEXT:    vpaddb %ymm6, %ymm6, %ymm8
    258 ; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm2, %ymm0, %ymm0
    259 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm2
    260 ; AVX512DQ-NEXT:    vpand %ymm3, %ymm2, %ymm2
    261 ; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm2, %ymm1, %ymm1
    262 ; AVX512DQ-NEXT:    vpsrlw $2, %ymm1, %ymm2
    263 ; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
    264 ; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
    265 ; AVX512DQ-NEXT:    vpsrlw $1, %ymm1, %ymm2
    266 ; AVX512DQ-NEXT:    vpand %ymm7, %ymm2, %ymm2
    267 ; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm2, %ymm1, %ymm1
    268 ; AVX512DQ-NEXT:    retq
    269 ;
    270 ; AVX512BW-LABEL: constant_shift_v64i8:
    271 ; AVX512BW:       # %bb.0:
    272 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
    273 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
    274 ; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm2
    275 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
    276 ; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1}
    277 ; AVX512BW-NEXT:    vpsrlw $2, %zmm0, %zmm2
    278 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
    279 ; AVX512BW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
    280 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
    281 ; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1}
    282 ; AVX512BW-NEXT:    vpsrlw $1, %zmm0, %zmm2
    283 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
    284 ; AVX512BW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
    285 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
    286 ; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1}
    287 ; AVX512BW-NEXT:    retq
    288   %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
    289   ret <64 x i8> %shift
    290 }
    291 
    292 ;
    293 ; Uniform Constant Shifts
    294 ;
    295 
    296 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) nounwind {
    297 ; ALL-LABEL: splatconstant_shift_v8i64:
    298 ; ALL:       # %bb.0:
    299 ; ALL-NEXT:    vpsrlq $7, %zmm0, %zmm0
    300 ; ALL-NEXT:    retq
    301   %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
    302   ret <8 x i64> %shift
    303 }
    304 
    305 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) nounwind {
    306 ; ALL-LABEL: splatconstant_shift_v16i32:
    307 ; ALL:       # %bb.0:
    308 ; ALL-NEXT:    vpsrld $5, %zmm0, %zmm0
    309 ; ALL-NEXT:    retq
    310   %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    311   ret <16 x i32> %shift
    312 }
    313 
    314 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
    315 ; AVX512DQ-LABEL: splatconstant_shift_v32i16:
    316 ; AVX512DQ:       # %bb.0:
    317 ; AVX512DQ-NEXT:    vpsrlw $3, %ymm0, %ymm0
    318 ; AVX512DQ-NEXT:    vpsrlw $3, %ymm1, %ymm1
    319 ; AVX512DQ-NEXT:    retq
    320 ;
    321 ; AVX512BW-LABEL: splatconstant_shift_v32i16:
    322 ; AVX512BW:       # %bb.0:
    323 ; AVX512BW-NEXT:    vpsrlw $3, %zmm0, %zmm0
    324 ; AVX512BW-NEXT:    retq
    325   %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
    326   ret <32 x i16> %shift
    327 }
    328 
    329 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
    330 ; AVX512DQ-LABEL: splatconstant_shift_v64i8:
    331 ; AVX512DQ:       # %bb.0:
    332 ; AVX512DQ-NEXT:    vpsrlw $3, %ymm0, %ymm0
    333 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
    334 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm0
    335 ; AVX512DQ-NEXT:    vpsrlw $3, %ymm1, %ymm1
    336 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm1
    337 ; AVX512DQ-NEXT:    retq
    338 ;
    339 ; AVX512BW-LABEL: splatconstant_shift_v64i8:
    340 ; AVX512BW:       # %bb.0:
    341 ; AVX512BW-NEXT:    vpsrlw $3, %zmm0, %zmm0
    342 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
    343 ; AVX512BW-NEXT:    retq
    344   %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    345   ret <64 x i8> %shift
    346 }
    347