Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
      4 
      5 ;
      6 ; Variable Shifts
      7 ;
      8 
      9 define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
     10 ; ALL-LABEL: var_shift_v8i64:
     11 ; ALL:       # %bb.0:
     12 ; ALL-NEXT:    vpsravq %zmm1, %zmm0, %zmm0
     13 ; ALL-NEXT:    retq
     14   %shift = ashr <8 x i64> %a, %b
     15   ret <8 x i64> %shift
     16 }
     17 
     18 define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
     19 ; ALL-LABEL: var_shift_v16i32:
     20 ; ALL:       # %bb.0:
     21 ; ALL-NEXT:    vpsravd %zmm1, %zmm0, %zmm0
     22 ; ALL-NEXT:    retq
     23   %shift = ashr <16 x i32> %a, %b
     24   ret <16 x i32> %shift
     25 }
     26 
     27 define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
     28 ; AVX512DQ-LABEL: var_shift_v32i16:
     29 ; AVX512DQ:       # %bb.0:
     30 ; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
     31 ; AVX512DQ-NEXT:    vpmovsxwd %ymm0, %zmm0
     32 ; AVX512DQ-NEXT:    vpsravd %zmm2, %zmm0, %zmm0
     33 ; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
     34 ; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} zmm2 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero
     35 ; AVX512DQ-NEXT:    vpmovsxwd %ymm1, %zmm1
     36 ; AVX512DQ-NEXT:    vpsravd %zmm2, %zmm1, %zmm1
     37 ; AVX512DQ-NEXT:    vpmovdw %zmm1, %ymm1
     38 ; AVX512DQ-NEXT:    retq
     39 ;
     40 ; AVX512BW-LABEL: var_shift_v32i16:
     41 ; AVX512BW:       # %bb.0:
     42 ; AVX512BW-NEXT:    vpsravw %zmm1, %zmm0, %zmm0
     43 ; AVX512BW-NEXT:    retq
     44   %shift = ashr <32 x i16> %a, %b
     45   ret <32 x i16> %shift
     46 }
     47 
     48 define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
     49 ; AVX512DQ-LABEL: var_shift_v64i8:
     50 ; AVX512DQ:       # %bb.0:
     51 ; AVX512DQ-NEXT:    vpsllw $5, %ymm2, %ymm2
     52 ; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
     53 ; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm5 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
     54 ; AVX512DQ-NEXT:    vpsraw $4, %ymm5, %ymm6
     55 ; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm6, %ymm5, %ymm5
     56 ; AVX512DQ-NEXT:    vpsraw $2, %ymm5, %ymm6
     57 ; AVX512DQ-NEXT:    vpaddw %ymm4, %ymm4, %ymm4
     58 ; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm6, %ymm5, %ymm5
     59 ; AVX512DQ-NEXT:    vpsraw $1, %ymm5, %ymm6
     60 ; AVX512DQ-NEXT:    vpaddw %ymm4, %ymm4, %ymm4
     61 ; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm6, %ymm5, %ymm4
     62 ; AVX512DQ-NEXT:    vpsrlw $8, %ymm4, %ymm4
     63 ; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm2 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23]
     64 ; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
     65 ; AVX512DQ-NEXT:    vpsraw $4, %ymm0, %ymm5
     66 ; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm5, %ymm0, %ymm0
     67 ; AVX512DQ-NEXT:    vpsraw $2, %ymm0, %ymm5
     68 ; AVX512DQ-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
     69 ; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm5, %ymm0, %ymm0
     70 ; AVX512DQ-NEXT:    vpsraw $1, %ymm0, %ymm5
     71 ; AVX512DQ-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
     72 ; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm5, %ymm0, %ymm0
     73 ; AVX512DQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
     74 ; AVX512DQ-NEXT:    vpackuswb %ymm4, %ymm0, %ymm0
     75 ; AVX512DQ-NEXT:    vpsllw $5, %ymm3, %ymm2
     76 ; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
     77 ; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
     78 ; AVX512DQ-NEXT:    vpsraw $4, %ymm4, %ymm5
     79 ; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm5, %ymm4, %ymm4
     80 ; AVX512DQ-NEXT:    vpsraw $2, %ymm4, %ymm5
     81 ; AVX512DQ-NEXT:    vpaddw %ymm3, %ymm3, %ymm3
     82 ; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm5, %ymm4, %ymm4
     83 ; AVX512DQ-NEXT:    vpsraw $1, %ymm4, %ymm5
     84 ; AVX512DQ-NEXT:    vpaddw %ymm3, %ymm3, %ymm3
     85 ; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm5, %ymm4, %ymm3
     86 ; AVX512DQ-NEXT:    vpsrlw $8, %ymm3, %ymm3
     87 ; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm2 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23]
     88 ; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
     89 ; AVX512DQ-NEXT:    vpsraw $4, %ymm1, %ymm4
     90 ; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm1, %ymm1
     91 ; AVX512DQ-NEXT:    vpsraw $2, %ymm1, %ymm4
     92 ; AVX512DQ-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
     93 ; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm1, %ymm1
     94 ; AVX512DQ-NEXT:    vpsraw $1, %ymm1, %ymm4
     95 ; AVX512DQ-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
     96 ; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm1, %ymm1
     97 ; AVX512DQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
     98 ; AVX512DQ-NEXT:    vpackuswb %ymm3, %ymm1, %ymm1
     99 ; AVX512DQ-NEXT:    retq
    100 ;
    101 ; AVX512BW-LABEL: var_shift_v64i8:
    102 ; AVX512BW:       # %bb.0:
    103 ; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} zmm2 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
    104 ; AVX512BW-NEXT:    vpsraw $4, %zmm2, %zmm3
    105 ; AVX512BW-NEXT:    vpsllw $5, %zmm1, %zmm1
    106 ; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} zmm4 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
    107 ; AVX512BW-NEXT:    vpmovb2m %zmm4, %k1
    108 ; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm2 {%k1}
    109 ; AVX512BW-NEXT:    vpsraw $2, %zmm2, %zmm3
    110 ; AVX512BW-NEXT:    vpaddw %zmm4, %zmm4, %zmm4
    111 ; AVX512BW-NEXT:    vpmovb2m %zmm4, %k1
    112 ; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm2 {%k1}
    113 ; AVX512BW-NEXT:    vpsraw $1, %zmm2, %zmm3
    114 ; AVX512BW-NEXT:    vpaddw %zmm4, %zmm4, %zmm4
    115 ; AVX512BW-NEXT:    vpmovb2m %zmm4, %k1
    116 ; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm2 {%k1}
    117 ; AVX512BW-NEXT:    vpsrlw $8, %zmm2, %zmm2
    118 ; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
    119 ; AVX512BW-NEXT:    vpsraw $4, %zmm0, %zmm3
    120 ; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm1 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
    121 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
    122 ; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm0 {%k1}
    123 ; AVX512BW-NEXT:    vpsraw $2, %zmm0, %zmm3
    124 ; AVX512BW-NEXT:    vpaddw %zmm1, %zmm1, %zmm1
    125 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
    126 ; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm0 {%k1}
    127 ; AVX512BW-NEXT:    vpsraw $1, %zmm0, %zmm3
    128 ; AVX512BW-NEXT:    vpaddw %zmm1, %zmm1, %zmm1
    129 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
    130 ; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm0 {%k1}
    131 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
    132 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
    133 ; AVX512BW-NEXT:    retq
    134   %shift = ashr <64 x i8> %a, %b
    135   ret <64 x i8> %shift
    136 }
    137 
    138 ;
    139 ; Uniform Variable Shifts
    140 ;
    141 
    142 define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
    143 ; ALL-LABEL: splatvar_shift_v8i64:
    144 ; ALL:       # %bb.0:
    145 ; ALL-NEXT:    vpsraq %xmm1, %zmm0, %zmm0
    146 ; ALL-NEXT:    retq
    147   %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
    148   %shift = ashr <8 x i64> %a, %splat
    149   ret <8 x i64> %shift
    150 }
    151 
    152 define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
    153 ; ALL-LABEL: splatvar_shift_v16i32:
    154 ; ALL:       # %bb.0:
    155 ; ALL-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
    156 ; ALL-NEXT:    vpsrad %xmm1, %zmm0, %zmm0
    157 ; ALL-NEXT:    retq
    158   %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
    159   %shift = ashr <16 x i32> %a, %splat
    160   ret <16 x i32> %shift
    161 }
    162 
    163 define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
    164 ; AVX512DQ-LABEL: splatvar_shift_v32i16:
    165 ; AVX512DQ:       # %bb.0:
    166 ; AVX512DQ-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
    167 ; AVX512DQ-NEXT:    vpsraw %xmm2, %ymm0, %ymm0
    168 ; AVX512DQ-NEXT:    vpsraw %xmm2, %ymm1, %ymm1
    169 ; AVX512DQ-NEXT:    retq
    170 ;
    171 ; AVX512BW-LABEL: splatvar_shift_v32i16:
    172 ; AVX512BW:       # %bb.0:
    173 ; AVX512BW-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
    174 ; AVX512BW-NEXT:    vpsraw %xmm1, %zmm0, %zmm0
    175 ; AVX512BW-NEXT:    retq
    176   %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
    177   %shift = ashr <32 x i16> %a, %splat
    178   ret <32 x i16> %shift
    179 }
    180 
    181 define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
    182 ; AVX512DQ-LABEL: splatvar_shift_v64i8:
    183 ; AVX512DQ:       # %bb.0:
    184 ; AVX512DQ-NEXT:    vpbroadcastb %xmm2, %ymm2
    185 ; AVX512DQ-NEXT:    vpsllw $5, %ymm2, %ymm2
    186 ; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
    187 ; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
    188 ; AVX512DQ-NEXT:    vpsraw $4, %ymm4, %ymm5
    189 ; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm5, %ymm4, %ymm4
    190 ; AVX512DQ-NEXT:    vpsraw $2, %ymm4, %ymm5
    191 ; AVX512DQ-NEXT:    vpaddw %ymm3, %ymm3, %ymm6
    192 ; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm5, %ymm4, %ymm4
    193 ; AVX512DQ-NEXT:    vpsraw $1, %ymm4, %ymm5
    194 ; AVX512DQ-NEXT:    vpaddw %ymm6, %ymm6, %ymm7
    195 ; AVX512DQ-NEXT:    vpblendvb %ymm7, %ymm5, %ymm4, %ymm4
    196 ; AVX512DQ-NEXT:    vpsrlw $8, %ymm4, %ymm4
    197 ; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm2 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23]
    198 ; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
    199 ; AVX512DQ-NEXT:    vpsraw $4, %ymm0, %ymm5
    200 ; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm5, %ymm0, %ymm0
    201 ; AVX512DQ-NEXT:    vpsraw $2, %ymm0, %ymm5
    202 ; AVX512DQ-NEXT:    vpaddw %ymm2, %ymm2, %ymm8
    203 ; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm5, %ymm0, %ymm0
    204 ; AVX512DQ-NEXT:    vpsraw $1, %ymm0, %ymm5
    205 ; AVX512DQ-NEXT:    vpaddw %ymm8, %ymm8, %ymm9
    206 ; AVX512DQ-NEXT:    vpblendvb %ymm9, %ymm5, %ymm0, %ymm0
    207 ; AVX512DQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
    208 ; AVX512DQ-NEXT:    vpackuswb %ymm4, %ymm0, %ymm0
    209 ; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
    210 ; AVX512DQ-NEXT:    vpsraw $4, %ymm4, %ymm5
    211 ; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm5, %ymm4, %ymm3
    212 ; AVX512DQ-NEXT:    vpsraw $2, %ymm3, %ymm4
    213 ; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
    214 ; AVX512DQ-NEXT:    vpsraw $1, %ymm3, %ymm4
    215 ; AVX512DQ-NEXT:    vpblendvb %ymm7, %ymm4, %ymm3, %ymm3
    216 ; AVX512DQ-NEXT:    vpsrlw $8, %ymm3, %ymm3
    217 ; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
    218 ; AVX512DQ-NEXT:    vpsraw $4, %ymm1, %ymm4
    219 ; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm1, %ymm1
    220 ; AVX512DQ-NEXT:    vpsraw $2, %ymm1, %ymm2
    221 ; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm2, %ymm1, %ymm1
    222 ; AVX512DQ-NEXT:    vpsraw $1, %ymm1, %ymm2
    223 ; AVX512DQ-NEXT:    vpblendvb %ymm9, %ymm2, %ymm1, %ymm1
    224 ; AVX512DQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
    225 ; AVX512DQ-NEXT:    vpackuswb %ymm3, %ymm1, %ymm1
    226 ; AVX512DQ-NEXT:    retq
    227 ;
    228 ; AVX512BW-LABEL: splatvar_shift_v64i8:
    229 ; AVX512BW:       # %bb.0:
    230 ; AVX512BW-NEXT:    vpbroadcastb %xmm1, %zmm1
    231 ; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} zmm2 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
    232 ; AVX512BW-NEXT:    vpsraw $4, %zmm2, %zmm3
    233 ; AVX512BW-NEXT:    vpsllw $5, %zmm1, %zmm1
    234 ; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} zmm4 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
    235 ; AVX512BW-NEXT:    vpmovb2m %zmm4, %k1
    236 ; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm2 {%k1}
    237 ; AVX512BW-NEXT:    vpsraw $2, %zmm2, %zmm3
    238 ; AVX512BW-NEXT:    vpaddw %zmm4, %zmm4, %zmm4
    239 ; AVX512BW-NEXT:    vpmovb2m %zmm4, %k1
    240 ; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm2 {%k1}
    241 ; AVX512BW-NEXT:    vpsraw $1, %zmm2, %zmm3
    242 ; AVX512BW-NEXT:    vpaddw %zmm4, %zmm4, %zmm4
    243 ; AVX512BW-NEXT:    vpmovb2m %zmm4, %k1
    244 ; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm2 {%k1}
    245 ; AVX512BW-NEXT:    vpsrlw $8, %zmm2, %zmm2
    246 ; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
    247 ; AVX512BW-NEXT:    vpsraw $4, %zmm0, %zmm3
    248 ; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm1 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
    249 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
    250 ; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm0 {%k1}
    251 ; AVX512BW-NEXT:    vpsraw $2, %zmm0, %zmm3
    252 ; AVX512BW-NEXT:    vpaddw %zmm1, %zmm1, %zmm1
    253 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
    254 ; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm0 {%k1}
    255 ; AVX512BW-NEXT:    vpsraw $1, %zmm0, %zmm3
    256 ; AVX512BW-NEXT:    vpaddw %zmm1, %zmm1, %zmm1
    257 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
    258 ; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm0 {%k1}
    259 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
    260 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
    261 ; AVX512BW-NEXT:    retq
    262   %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
    263   %shift = ashr <64 x i8> %a, %splat
    264   ret <64 x i8> %shift
    265 }
    266 
    267 ;
    268 ; Constant Shifts
    269 ;
    270 
    271 define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
    272 ; ALL-LABEL: constant_shift_v8i64:
    273 ; ALL:       # %bb.0:
    274 ; ALL-NEXT:    vpsravq {{.*}}(%rip), %zmm0, %zmm0
    275 ; ALL-NEXT:    retq
    276   %shift = ashr <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62>
    277   ret <8 x i64> %shift
    278 }
    279 
    280 define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind {
    281 ; ALL-LABEL: constant_shift_v16i32:
    282 ; ALL:       # %bb.0:
    283 ; ALL-NEXT:    vpsravd {{.*}}(%rip), %zmm0, %zmm0
    284 ; ALL-NEXT:    retq
    285   %shift = ashr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
    286   ret <16 x i32> %shift
    287 }
    288 
    289 define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
    290 ; AVX512DQ-LABEL: constant_shift_v32i16:
    291 ; AVX512DQ:       # %bb.0:
    292 ; AVX512DQ-NEXT:    vpmovsxwd %ymm0, %zmm0
    293 ; AVX512DQ-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
    294 ; AVX512DQ-NEXT:    vpsravd %zmm2, %zmm0, %zmm0
    295 ; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
    296 ; AVX512DQ-NEXT:    vpmovsxwd %ymm1, %zmm1
    297 ; AVX512DQ-NEXT:    vpsravd %zmm2, %zmm1, %zmm1
    298 ; AVX512DQ-NEXT:    vpmovdw %zmm1, %ymm1
    299 ; AVX512DQ-NEXT:    retq
    300 ;
    301 ; AVX512BW-LABEL: constant_shift_v32i16:
    302 ; AVX512BW:       # %bb.0:
    303 ; AVX512BW-NEXT:    vpsravw {{.*}}(%rip), %zmm0, %zmm0
    304 ; AVX512BW-NEXT:    retq
    305   %shift = ashr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
    306   ret <32 x i16> %shift
    307 }
    308 
    309 define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
    310 ; AVX512DQ-LABEL: constant_shift_v64i8:
    311 ; AVX512DQ:       # %bb.0:
    312 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
    313 ; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
    314 ; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
    315 ; AVX512DQ-NEXT:    vpsraw $4, %ymm4, %ymm5
    316 ; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm5, %ymm4, %ymm4
    317 ; AVX512DQ-NEXT:    vpsraw $2, %ymm4, %ymm5
    318 ; AVX512DQ-NEXT:    vpaddw %ymm3, %ymm3, %ymm6
    319 ; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm5, %ymm4, %ymm4
    320 ; AVX512DQ-NEXT:    vpsraw $1, %ymm4, %ymm5
    321 ; AVX512DQ-NEXT:    vpaddw %ymm6, %ymm6, %ymm7
    322 ; AVX512DQ-NEXT:    vpblendvb %ymm7, %ymm5, %ymm4, %ymm4
    323 ; AVX512DQ-NEXT:    vpsrlw $8, %ymm4, %ymm4
    324 ; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm2 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23]
    325 ; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
    326 ; AVX512DQ-NEXT:    vpsraw $4, %ymm0, %ymm5
    327 ; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm5, %ymm0, %ymm0
    328 ; AVX512DQ-NEXT:    vpsraw $2, %ymm0, %ymm5
    329 ; AVX512DQ-NEXT:    vpaddw %ymm2, %ymm2, %ymm8
    330 ; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm5, %ymm0, %ymm0
    331 ; AVX512DQ-NEXT:    vpsraw $1, %ymm0, %ymm5
    332 ; AVX512DQ-NEXT:    vpaddw %ymm8, %ymm8, %ymm9
    333 ; AVX512DQ-NEXT:    vpblendvb %ymm9, %ymm5, %ymm0, %ymm0
    334 ; AVX512DQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
    335 ; AVX512DQ-NEXT:    vpackuswb %ymm4, %ymm0, %ymm0
    336 ; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
    337 ; AVX512DQ-NEXT:    vpsraw $4, %ymm4, %ymm5
    338 ; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm5, %ymm4, %ymm3
    339 ; AVX512DQ-NEXT:    vpsraw $2, %ymm3, %ymm4
    340 ; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
    341 ; AVX512DQ-NEXT:    vpsraw $1, %ymm3, %ymm4
    342 ; AVX512DQ-NEXT:    vpblendvb %ymm7, %ymm4, %ymm3, %ymm3
    343 ; AVX512DQ-NEXT:    vpsrlw $8, %ymm3, %ymm3
    344 ; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
    345 ; AVX512DQ-NEXT:    vpsraw $4, %ymm1, %ymm4
    346 ; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm1, %ymm1
    347 ; AVX512DQ-NEXT:    vpsraw $2, %ymm1, %ymm2
    348 ; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm2, %ymm1, %ymm1
    349 ; AVX512DQ-NEXT:    vpsraw $1, %ymm1, %ymm2
    350 ; AVX512DQ-NEXT:    vpblendvb %ymm9, %ymm2, %ymm1, %ymm1
    351 ; AVX512DQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
    352 ; AVX512DQ-NEXT:    vpackuswb %ymm3, %ymm1, %ymm1
    353 ; AVX512DQ-NEXT:    retq
    354 ;
    355 ; AVX512BW-LABEL: constant_shift_v64i8:
    356 ; AVX512BW:       # %bb.0:
    357 ; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} zmm1 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
    358 ; AVX512BW-NEXT:    vpsraw $4, %zmm1, %zmm2
    359 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
    360 ; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} zmm4 = zmm0[8],zmm3[8],zmm0[9],zmm3[9],zmm0[10],zmm3[10],zmm0[11],zmm3[11],zmm0[12],zmm3[12],zmm0[13],zmm3[13],zmm0[14],zmm3[14],zmm0[15],zmm3[15],zmm0[24],zmm3[24],zmm0[25],zmm3[25],zmm0[26],zmm3[26],zmm0[27],zmm3[27],zmm0[28],zmm3[28],zmm0[29],zmm3[29],zmm0[30],zmm3[30],zmm0[31],zmm3[31],zmm0[40],zmm3[40],zmm0[41],zmm3[41],zmm0[42],zmm3[42],zmm0[43],zmm3[43],zmm0[44],zmm3[44],zmm0[45],zmm3[45],zmm0[46],zmm3[46],zmm0[47],zmm3[47],zmm0[56],zmm3[56],zmm0[57],zmm3[57],zmm0[58],zmm3[58],zmm0[59],zmm3[59],zmm0[60],zmm3[60],zmm0[61],zmm3[61],zmm0[62],zmm3[62],zmm0[63],zmm3[63]
    361 ; AVX512BW-NEXT:    vpmovb2m %zmm4, %k1
    362 ; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm1 {%k1}
    363 ; AVX512BW-NEXT:    vpsraw $2, %zmm1, %zmm2
    364 ; AVX512BW-NEXT:    vpaddw %zmm4, %zmm4, %zmm4
    365 ; AVX512BW-NEXT:    vpmovb2m %zmm4, %k1
    366 ; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm1 {%k1}
    367 ; AVX512BW-NEXT:    vpsraw $1, %zmm1, %zmm2
    368 ; AVX512BW-NEXT:    vpaddw %zmm4, %zmm4, %zmm4
    369 ; AVX512BW-NEXT:    vpmovb2m %zmm4, %k1
    370 ; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm1 {%k1}
    371 ; AVX512BW-NEXT:    vpsrlw $8, %zmm1, %zmm1
    372 ; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
    373 ; AVX512BW-NEXT:    vpsraw $4, %zmm0, %zmm2
    374 ; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm3 = zmm0[0],zmm3[0],zmm0[1],zmm3[1],zmm0[2],zmm3[2],zmm0[3],zmm3[3],zmm0[4],zmm3[4],zmm0[5],zmm3[5],zmm0[6],zmm3[6],zmm0[7],zmm3[7],zmm0[16],zmm3[16],zmm0[17],zmm3[17],zmm0[18],zmm3[18],zmm0[19],zmm3[19],zmm0[20],zmm3[20],zmm0[21],zmm3[21],zmm0[22],zmm3[22],zmm0[23],zmm3[23],zmm0[32],zmm3[32],zmm0[33],zmm3[33],zmm0[34],zmm3[34],zmm0[35],zmm3[35],zmm0[36],zmm3[36],zmm0[37],zmm3[37],zmm0[38],zmm3[38],zmm0[39],zmm3[39],zmm0[48],zmm3[48],zmm0[49],zmm3[49],zmm0[50],zmm3[50],zmm0[51],zmm3[51],zmm0[52],zmm3[52],zmm0[53],zmm3[53],zmm0[54],zmm3[54],zmm0[55],zmm3[55]
    375 ; AVX512BW-NEXT:    vpmovb2m %zmm3, %k1
    376 ; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1}
    377 ; AVX512BW-NEXT:    vpsraw $2, %zmm0, %zmm2
    378 ; AVX512BW-NEXT:    vpaddw %zmm3, %zmm3, %zmm3
    379 ; AVX512BW-NEXT:    vpmovb2m %zmm3, %k1
    380 ; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1}
    381 ; AVX512BW-NEXT:    vpsraw $1, %zmm0, %zmm2
    382 ; AVX512BW-NEXT:    vpaddw %zmm3, %zmm3, %zmm3
    383 ; AVX512BW-NEXT:    vpmovb2m %zmm3, %k1
    384 ; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1}
    385 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
    386 ; AVX512BW-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0
    387 ; AVX512BW-NEXT:    retq
    388   %shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
    389   ret <64 x i8> %shift
    390 }
    391 
    392 ;
    393 ; Uniform Constant Shifts
    394 ;
    395 
    396 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) nounwind {
    397 ; ALL-LABEL: splatconstant_shift_v8i64:
    398 ; ALL:       # %bb.0:
    399 ; ALL-NEXT:    vpsraq $7, %zmm0, %zmm0
    400 ; ALL-NEXT:    retq
    401   %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
    402   ret <8 x i64> %shift
    403 }
    404 
    405 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) nounwind {
    406 ; ALL-LABEL: splatconstant_shift_v16i32:
    407 ; ALL:       # %bb.0:
    408 ; ALL-NEXT:    vpsrad $5, %zmm0, %zmm0
    409 ; ALL-NEXT:    retq
    410   %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    411   ret <16 x i32> %shift
    412 }
    413 
    414 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
    415 ; AVX512DQ-LABEL: splatconstant_shift_v32i16:
    416 ; AVX512DQ:       # %bb.0:
    417 ; AVX512DQ-NEXT:    vpsraw $3, %ymm0, %ymm0
    418 ; AVX512DQ-NEXT:    vpsraw $3, %ymm1, %ymm1
    419 ; AVX512DQ-NEXT:    retq
    420 ;
    421 ; AVX512BW-LABEL: splatconstant_shift_v32i16:
    422 ; AVX512BW:       # %bb.0:
    423 ; AVX512BW-NEXT:    vpsraw $3, %zmm0, %zmm0
    424 ; AVX512BW-NEXT:    retq
    425   %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
    426   ret <32 x i16> %shift
    427 }
    428 
    429 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
    430 ; AVX512DQ-LABEL: splatconstant_shift_v64i8:
    431 ; AVX512DQ:       # %bb.0:
    432 ; AVX512DQ-NEXT:    vpsrlw $3, %ymm0, %ymm0
    433 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
    434 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm0
    435 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
    436 ; AVX512DQ-NEXT:    vpxor %ymm3, %ymm0, %ymm0
    437 ; AVX512DQ-NEXT:    vpsubb %ymm3, %ymm0, %ymm0
    438 ; AVX512DQ-NEXT:    vpsrlw $3, %ymm1, %ymm1
    439 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm1
    440 ; AVX512DQ-NEXT:    vpxor %ymm3, %ymm1, %ymm1
    441 ; AVX512DQ-NEXT:    vpsubb %ymm3, %ymm1, %ymm1
    442 ; AVX512DQ-NEXT:    retq
    443 ;
    444 ; AVX512BW-LABEL: splatconstant_shift_v64i8:
    445 ; AVX512BW:       # %bb.0:
    446 ; AVX512BW-NEXT:    vpsrlw $3, %zmm0, %zmm0
    447 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
    448 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
    449 ; AVX512BW-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
    450 ; AVX512BW-NEXT:    vpsubb %zmm1, %zmm0, %zmm0
    451 ; AVX512BW-NEXT:    retq
    452   %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    453   ret <64 x i8> %shift
    454 }
    455 
    456 define <64 x i8> @ashr_const7_v64i8(<64 x i8> %a) {
    457 ; AVX512DQ-LABEL: ashr_const7_v64i8:
    458 ; AVX512DQ:       # %bb.0:
    459 ; AVX512DQ-NEXT:    vpxor %xmm2, %xmm2, %xmm2
    460 ; AVX512DQ-NEXT:    vpcmpgtb %ymm0, %ymm2, %ymm0
    461 ; AVX512DQ-NEXT:    vpcmpgtb %ymm1, %ymm2, %ymm1
    462 ; AVX512DQ-NEXT:    retq
    463 ;
    464 ; AVX512BW-LABEL: ashr_const7_v64i8:
    465 ; AVX512BW:       # %bb.0:
    466 ; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
    467 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
    468 ; AVX512BW-NEXT:    retq
    469   %res = ashr <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
    470   ret <64 x i8> %res
    471 }
    472