Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VLBW
      6 
      7 ;
      8 ; Variable Rotates
      9 ;
     10 
     11 define <8 x i64> @var_rotate_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
     12 ; AVX512-LABEL: var_rotate_v8i64:
     13 ; AVX512:       # %bb.0:
     14 ; AVX512-NEXT:    vprolvq %zmm1, %zmm0, %zmm0
     15 ; AVX512-NEXT:    retq
     16   %b64 = sub <8 x i64> <i64 64, i64 64, i64 64, i64 64, i64 64, i64 64, i64 64, i64 64>, %b
     17   %shl = shl <8 x i64> %a, %b
     18   %lshr = lshr <8 x i64> %a, %b64
     19   %or = or <8 x i64> %shl, %lshr
     20   ret <8 x i64> %or
     21 }
     22 
     23 define <16 x i32> @var_rotate_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
     24 ; AVX512-LABEL: var_rotate_v16i32:
     25 ; AVX512:       # %bb.0:
     26 ; AVX512-NEXT:    vprolvd %zmm1, %zmm0, %zmm0
     27 ; AVX512-NEXT:    retq
     28   %b32 = sub <16 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>, %b
     29   %shl = shl <16 x i32> %a, %b
     30   %lshr = lshr <16 x i32> %a, %b32
     31   %or = or <16 x i32> %shl, %lshr
     32   ret <16 x i32> %or
     33 }
     34 
     35 define <32 x i16> @var_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
     36 ; AVX512F-LABEL: var_rotate_v32i16:
     37 ; AVX512F:       # %bb.0:
     38 ; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
     39 ; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
     40 ; AVX512F-NEXT:    vpsllvd %zmm4, %zmm0, %zmm4
     41 ; AVX512F-NEXT:    vpmovdw %zmm4, %ymm4
     42 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm5 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
     43 ; AVX512F-NEXT:    vpsubw %ymm2, %ymm5, %ymm2
     44 ; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
     45 ; AVX512F-NEXT:    vpsrlvd %zmm2, %zmm0, %zmm0
     46 ; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
     47 ; AVX512F-NEXT:    vpor %ymm0, %ymm4, %ymm0
     48 ; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm2 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero
     49 ; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
     50 ; AVX512F-NEXT:    vpsllvd %zmm2, %zmm1, %zmm2
     51 ; AVX512F-NEXT:    vpmovdw %zmm2, %ymm2
     52 ; AVX512F-NEXT:    vpsubw %ymm3, %ymm5, %ymm3
     53 ; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero
     54 ; AVX512F-NEXT:    vpsrlvd %zmm3, %zmm1, %zmm1
     55 ; AVX512F-NEXT:    vpmovdw %zmm1, %ymm1
     56 ; AVX512F-NEXT:    vpor %ymm1, %ymm2, %ymm1
     57 ; AVX512F-NEXT:    retq
     58 ;
     59 ; AVX512VL-LABEL: var_rotate_v32i16:
     60 ; AVX512VL:       # %bb.0:
     61 ; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
     62 ; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
     63 ; AVX512VL-NEXT:    vpsllvd %zmm4, %zmm0, %zmm4
     64 ; AVX512VL-NEXT:    vpmovdw %zmm4, %ymm4
     65 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm5 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
     66 ; AVX512VL-NEXT:    vpsubw %ymm2, %ymm5, %ymm2
     67 ; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
     68 ; AVX512VL-NEXT:    vpsrlvd %zmm2, %zmm0, %zmm0
     69 ; AVX512VL-NEXT:    vpmovdw %zmm0, %ymm0
     70 ; AVX512VL-NEXT:    vpor %ymm0, %ymm4, %ymm0
     71 ; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm2 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero
     72 ; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
     73 ; AVX512VL-NEXT:    vpsllvd %zmm2, %zmm1, %zmm2
     74 ; AVX512VL-NEXT:    vpmovdw %zmm2, %ymm2
     75 ; AVX512VL-NEXT:    vpsubw %ymm3, %ymm5, %ymm3
     76 ; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero
     77 ; AVX512VL-NEXT:    vpsrlvd %zmm3, %zmm1, %zmm1
     78 ; AVX512VL-NEXT:    vpmovdw %zmm1, %ymm1
     79 ; AVX512VL-NEXT:    vpor %ymm1, %ymm2, %ymm1
     80 ; AVX512VL-NEXT:    retq
     81 ;
     82 ; AVX512BW-LABEL: var_rotate_v32i16:
     83 ; AVX512BW:       # %bb.0:
     84 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
     85 ; AVX512BW-NEXT:    vpsubw %zmm1, %zmm2, %zmm2
     86 ; AVX512BW-NEXT:    vpsllvw %zmm1, %zmm0, %zmm1
     87 ; AVX512BW-NEXT:    vpsrlvw %zmm2, %zmm0, %zmm0
     88 ; AVX512BW-NEXT:    vporq %zmm0, %zmm1, %zmm0
     89 ; AVX512BW-NEXT:    retq
     90 ;
     91 ; AVX512VLBW-LABEL: var_rotate_v32i16:
     92 ; AVX512VLBW:       # %bb.0:
     93 ; AVX512VLBW-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
     94 ; AVX512VLBW-NEXT:    vpsubw %zmm1, %zmm2, %zmm2
     95 ; AVX512VLBW-NEXT:    vpsllvw %zmm1, %zmm0, %zmm1
     96 ; AVX512VLBW-NEXT:    vpsrlvw %zmm2, %zmm0, %zmm0
     97 ; AVX512VLBW-NEXT:    vporq %zmm0, %zmm1, %zmm0
     98 ; AVX512VLBW-NEXT:    retq
     99   %b16 = sub <32 x i16> <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>, %b
    100   %shl = shl <32 x i16> %a, %b
    101   %lshr = lshr <32 x i16> %a, %b16
    102   %or = or <32 x i16> %shl, %lshr
    103   ret <32 x i16> %or
    104 }
    105 
    106 define <64 x i8> @var_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
    107 ; AVX512F-LABEL: var_rotate_v64i8:
    108 ; AVX512F:       # %bb.0:
    109 ; AVX512F-NEXT:    vpsrlw $4, %ymm0, %ymm4
    110 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    111 ; AVX512F-NEXT:    vpand %ymm5, %ymm4, %ymm4
    112 ; AVX512F-NEXT:    vpsllw $4, %ymm0, %ymm6
    113 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm7 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
    114 ; AVX512F-NEXT:    vpand %ymm7, %ymm6, %ymm6
    115 ; AVX512F-NEXT:    vpor %ymm4, %ymm6, %ymm4
    116 ; AVX512F-NEXT:    vpsllw $5, %ymm2, %ymm2
    117 ; AVX512F-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
    118 ; AVX512F-NEXT:    vpsrlw $6, %ymm0, %ymm4
    119 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm6 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
    120 ; AVX512F-NEXT:    vpand %ymm6, %ymm4, %ymm4
    121 ; AVX512F-NEXT:    vpsllw $2, %ymm0, %ymm8
    122 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm9 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
    123 ; AVX512F-NEXT:    vpand %ymm9, %ymm8, %ymm8
    124 ; AVX512F-NEXT:    vpor %ymm4, %ymm8, %ymm4
    125 ; AVX512F-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
    126 ; AVX512F-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
    127 ; AVX512F-NEXT:    vpsrlw $7, %ymm0, %ymm4
    128 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm8 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
    129 ; AVX512F-NEXT:    vpand %ymm8, %ymm4, %ymm4
    130 ; AVX512F-NEXT:    vpaddb %ymm0, %ymm0, %ymm10
    131 ; AVX512F-NEXT:    vpor %ymm4, %ymm10, %ymm4
    132 ; AVX512F-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
    133 ; AVX512F-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
    134 ; AVX512F-NEXT:    vpsrlw $4, %ymm1, %ymm2
    135 ; AVX512F-NEXT:    vpand %ymm5, %ymm2, %ymm2
    136 ; AVX512F-NEXT:    vpsllw $4, %ymm1, %ymm4
    137 ; AVX512F-NEXT:    vpand %ymm7, %ymm4, %ymm4
    138 ; AVX512F-NEXT:    vpor %ymm2, %ymm4, %ymm2
    139 ; AVX512F-NEXT:    vpsllw $5, %ymm3, %ymm3
    140 ; AVX512F-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
    141 ; AVX512F-NEXT:    vpsrlw $6, %ymm1, %ymm2
    142 ; AVX512F-NEXT:    vpand %ymm6, %ymm2, %ymm2
    143 ; AVX512F-NEXT:    vpsllw $2, %ymm1, %ymm4
    144 ; AVX512F-NEXT:    vpand %ymm9, %ymm4, %ymm4
    145 ; AVX512F-NEXT:    vpor %ymm2, %ymm4, %ymm2
    146 ; AVX512F-NEXT:    vpaddb %ymm3, %ymm3, %ymm3
    147 ; AVX512F-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
    148 ; AVX512F-NEXT:    vpsrlw $7, %ymm1, %ymm2
    149 ; AVX512F-NEXT:    vpand %ymm8, %ymm2, %ymm2
    150 ; AVX512F-NEXT:    vpaddb %ymm1, %ymm1, %ymm4
    151 ; AVX512F-NEXT:    vpor %ymm2, %ymm4, %ymm2
    152 ; AVX512F-NEXT:    vpaddb %ymm3, %ymm3, %ymm3
    153 ; AVX512F-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
    154 ; AVX512F-NEXT:    retq
    155 ;
    156 ; AVX512VL-LABEL: var_rotate_v64i8:
    157 ; AVX512VL:       # %bb.0:
    158 ; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm4
    159 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    160 ; AVX512VL-NEXT:    vpand %ymm5, %ymm4, %ymm4
    161 ; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm6
    162 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm7 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
    163 ; AVX512VL-NEXT:    vpand %ymm7, %ymm6, %ymm6
    164 ; AVX512VL-NEXT:    vpor %ymm4, %ymm6, %ymm4
    165 ; AVX512VL-NEXT:    vpsllw $5, %ymm2, %ymm2
    166 ; AVX512VL-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
    167 ; AVX512VL-NEXT:    vpsrlw $6, %ymm0, %ymm4
    168 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm6 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
    169 ; AVX512VL-NEXT:    vpand %ymm6, %ymm4, %ymm4
    170 ; AVX512VL-NEXT:    vpsllw $2, %ymm0, %ymm8
    171 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm9 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
    172 ; AVX512VL-NEXT:    vpand %ymm9, %ymm8, %ymm8
    173 ; AVX512VL-NEXT:    vpor %ymm4, %ymm8, %ymm4
    174 ; AVX512VL-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
    175 ; AVX512VL-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
    176 ; AVX512VL-NEXT:    vpsrlw $7, %ymm0, %ymm4
    177 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm8 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
    178 ; AVX512VL-NEXT:    vpand %ymm8, %ymm4, %ymm4
    179 ; AVX512VL-NEXT:    vpaddb %ymm0, %ymm0, %ymm10
    180 ; AVX512VL-NEXT:    vpor %ymm4, %ymm10, %ymm4
    181 ; AVX512VL-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
    182 ; AVX512VL-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
    183 ; AVX512VL-NEXT:    vpsrlw $4, %ymm1, %ymm2
    184 ; AVX512VL-NEXT:    vpand %ymm5, %ymm2, %ymm2
    185 ; AVX512VL-NEXT:    vpsllw $4, %ymm1, %ymm4
    186 ; AVX512VL-NEXT:    vpand %ymm7, %ymm4, %ymm4
    187 ; AVX512VL-NEXT:    vpor %ymm2, %ymm4, %ymm2
    188 ; AVX512VL-NEXT:    vpsllw $5, %ymm3, %ymm3
    189 ; AVX512VL-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
    190 ; AVX512VL-NEXT:    vpsrlw $6, %ymm1, %ymm2
    191 ; AVX512VL-NEXT:    vpand %ymm6, %ymm2, %ymm2
    192 ; AVX512VL-NEXT:    vpsllw $2, %ymm1, %ymm4
    193 ; AVX512VL-NEXT:    vpand %ymm9, %ymm4, %ymm4
    194 ; AVX512VL-NEXT:    vpor %ymm2, %ymm4, %ymm2
    195 ; AVX512VL-NEXT:    vpaddb %ymm3, %ymm3, %ymm3
    196 ; AVX512VL-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
    197 ; AVX512VL-NEXT:    vpsrlw $7, %ymm1, %ymm2
    198 ; AVX512VL-NEXT:    vpand %ymm8, %ymm2, %ymm2
    199 ; AVX512VL-NEXT:    vpaddb %ymm1, %ymm1, %ymm4
    200 ; AVX512VL-NEXT:    vpor %ymm2, %ymm4, %ymm2
    201 ; AVX512VL-NEXT:    vpaddb %ymm3, %ymm3, %ymm3
    202 ; AVX512VL-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
    203 ; AVX512VL-NEXT:    retq
    204 ;
    205 ; AVX512BW-LABEL: var_rotate_v64i8:
    206 ; AVX512BW:       # %bb.0:
    207 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
    208 ; AVX512BW-NEXT:    vpsubb %zmm1, %zmm2, %zmm2
    209 ; AVX512BW-NEXT:    vpsllw $4, %zmm0, %zmm3
    210 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm3, %zmm3
    211 ; AVX512BW-NEXT:    vpsllw $5, %zmm1, %zmm1
    212 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
    213 ; AVX512BW-NEXT:    vpblendmb %zmm3, %zmm0, %zmm3 {%k1}
    214 ; AVX512BW-NEXT:    vpsllw $2, %zmm3, %zmm4
    215 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm4, %zmm4
    216 ; AVX512BW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
    217 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
    218 ; AVX512BW-NEXT:    vmovdqu8 %zmm4, %zmm3 {%k1}
    219 ; AVX512BW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
    220 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
    221 ; AVX512BW-NEXT:    vpaddb %zmm3, %zmm3, %zmm3 {%k1}
    222 ; AVX512BW-NEXT:    vpsllw $5, %zmm2, %zmm1
    223 ; AVX512BW-NEXT:    vpaddb %zmm1, %zmm1, %zmm2
    224 ; AVX512BW-NEXT:    vpmovb2m %zmm2, %k1
    225 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k2
    226 ; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm1
    227 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
    228 ; AVX512BW-NEXT:    vmovdqu8 %zmm1, %zmm0 {%k2}
    229 ; AVX512BW-NEXT:    vpsrlw $2, %zmm0, %zmm1
    230 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
    231 ; AVX512BW-NEXT:    vmovdqu8 %zmm1, %zmm0 {%k1}
    232 ; AVX512BW-NEXT:    vpsrlw $1, %zmm0, %zmm1
    233 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
    234 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm2, %zmm2
    235 ; AVX512BW-NEXT:    vpmovb2m %zmm2, %k1
    236 ; AVX512BW-NEXT:    vmovdqu8 %zmm1, %zmm0 {%k1}
    237 ; AVX512BW-NEXT:    vporq %zmm0, %zmm3, %zmm0
    238 ; AVX512BW-NEXT:    retq
    239 ;
    240 ; AVX512VLBW-LABEL: var_rotate_v64i8:
    241 ; AVX512VLBW:       # %bb.0:
    242 ; AVX512VLBW-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
    243 ; AVX512VLBW-NEXT:    vpsubb %zmm1, %zmm2, %zmm2
    244 ; AVX512VLBW-NEXT:    vpsllw $4, %zmm0, %zmm3
    245 ; AVX512VLBW-NEXT:    vpandq {{.*}}(%rip), %zmm3, %zmm3
    246 ; AVX512VLBW-NEXT:    vpsllw $5, %zmm1, %zmm1
    247 ; AVX512VLBW-NEXT:    vpmovb2m %zmm1, %k1
    248 ; AVX512VLBW-NEXT:    vpblendmb %zmm3, %zmm0, %zmm3 {%k1}
    249 ; AVX512VLBW-NEXT:    vpsllw $2, %zmm3, %zmm4
    250 ; AVX512VLBW-NEXT:    vpandq {{.*}}(%rip), %zmm4, %zmm4
    251 ; AVX512VLBW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
    252 ; AVX512VLBW-NEXT:    vpmovb2m %zmm1, %k1
    253 ; AVX512VLBW-NEXT:    vmovdqu8 %zmm4, %zmm3 {%k1}
    254 ; AVX512VLBW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
    255 ; AVX512VLBW-NEXT:    vpmovb2m %zmm1, %k1
    256 ; AVX512VLBW-NEXT:    vpaddb %zmm3, %zmm3, %zmm3 {%k1}
    257 ; AVX512VLBW-NEXT:    vpsllw $5, %zmm2, %zmm1
    258 ; AVX512VLBW-NEXT:    vpaddb %zmm1, %zmm1, %zmm2
    259 ; AVX512VLBW-NEXT:    vpmovb2m %zmm2, %k1
    260 ; AVX512VLBW-NEXT:    vpmovb2m %zmm1, %k2
    261 ; AVX512VLBW-NEXT:    vpsrlw $4, %zmm0, %zmm1
    262 ; AVX512VLBW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
    263 ; AVX512VLBW-NEXT:    vmovdqu8 %zmm1, %zmm0 {%k2}
    264 ; AVX512VLBW-NEXT:    vpsrlw $2, %zmm0, %zmm1
    265 ; AVX512VLBW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
    266 ; AVX512VLBW-NEXT:    vmovdqu8 %zmm1, %zmm0 {%k1}
    267 ; AVX512VLBW-NEXT:    vpsrlw $1, %zmm0, %zmm1
    268 ; AVX512VLBW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
    269 ; AVX512VLBW-NEXT:    vpaddb %zmm2, %zmm2, %zmm2
    270 ; AVX512VLBW-NEXT:    vpmovb2m %zmm2, %k1
    271 ; AVX512VLBW-NEXT:    vmovdqu8 %zmm1, %zmm0 {%k1}
    272 ; AVX512VLBW-NEXT:    vporq %zmm0, %zmm3, %zmm0
    273 ; AVX512VLBW-NEXT:    retq
    274   %b8 = sub <64 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>, %b
    275   %shl = shl <64 x i8> %a, %b
    276   %lshr = lshr <64 x i8> %a, %b8
    277   %or = or <64 x i8> %shl, %lshr
    278   ret <64 x i8> %or
    279 }
    280 
    281 ;
    282 ; Uniform Variable Rotates
    283 ;
    284 
    285 define <8 x i64> @splatvar_rotate_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
    286 ; AVX512-LABEL: splatvar_rotate_v8i64:
    287 ; AVX512:       # %bb.0:
    288 ; AVX512-NEXT:    vpbroadcastq %xmm1, %zmm1
    289 ; AVX512-NEXT:    vprolvq %zmm1, %zmm0, %zmm0
    290 ; AVX512-NEXT:    retq
    291   %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
    292   %splat64 = sub <8 x i64> <i64 64, i64 64, i64 64, i64 64, i64 64, i64 64, i64 64, i64 64>, %splat
    293   %shl = shl <8 x i64> %a, %splat
    294   %lshr = lshr <8 x i64> %a, %splat64
    295   %or = or <8 x i64> %shl, %lshr
    296   ret <8 x i64> %or
    297 }
    298 
    299 define <16 x i32> @splatvar_rotate_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
    300 ; AVX512-LABEL: splatvar_rotate_v16i32:
    301 ; AVX512:       # %bb.0:
    302 ; AVX512-NEXT:    vpbroadcastd %xmm1, %zmm1
    303 ; AVX512-NEXT:    vprolvd %zmm1, %zmm0, %zmm0
    304 ; AVX512-NEXT:    retq
    305   %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
    306   %splat32 = sub <16 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>, %splat
    307   %shl = shl <16 x i32> %a, %splat
    308   %lshr = lshr <16 x i32> %a, %splat32
    309   %or = or <16 x i32> %shl, %lshr
    310   ret <16 x i32> %or
    311 }
    312 
    313 define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
    314 ; AVX512F-LABEL: splatvar_rotate_v32i16:
    315 ; AVX512F:       # %bb.0:
    316 ; AVX512F-NEXT:    vpbroadcastw %xmm2, %ymm3
    317 ; AVX512F-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
    318 ; AVX512F-NEXT:    vpsllw %xmm2, %ymm0, %ymm4
    319 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm5 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
    320 ; AVX512F-NEXT:    vpsubw %ymm3, %ymm5, %ymm3
    321 ; AVX512F-NEXT:    vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
    322 ; AVX512F-NEXT:    vpsrlw %xmm3, %ymm0, %ymm0
    323 ; AVX512F-NEXT:    vpor %ymm0, %ymm4, %ymm0
    324 ; AVX512F-NEXT:    vpsllw %xmm2, %ymm1, %ymm2
    325 ; AVX512F-NEXT:    vpsrlw %xmm3, %ymm1, %ymm1
    326 ; AVX512F-NEXT:    vpor %ymm1, %ymm2, %ymm1
    327 ; AVX512F-NEXT:    retq
    328 ;
    329 ; AVX512VL-LABEL: splatvar_rotate_v32i16:
    330 ; AVX512VL:       # %bb.0:
    331 ; AVX512VL-NEXT:    vpbroadcastw %xmm2, %ymm3
    332 ; AVX512VL-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
    333 ; AVX512VL-NEXT:    vpsllw %xmm2, %ymm0, %ymm4
    334 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm5 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
    335 ; AVX512VL-NEXT:    vpsubw %ymm3, %ymm5, %ymm3
    336 ; AVX512VL-NEXT:    vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
    337 ; AVX512VL-NEXT:    vpsrlw %xmm3, %ymm0, %ymm0
    338 ; AVX512VL-NEXT:    vpor %ymm0, %ymm4, %ymm0
    339 ; AVX512VL-NEXT:    vpsllw %xmm2, %ymm1, %ymm2
    340 ; AVX512VL-NEXT:    vpsrlw %xmm3, %ymm1, %ymm1
    341 ; AVX512VL-NEXT:    vpor %ymm1, %ymm2, %ymm1
    342 ; AVX512VL-NEXT:    retq
    343 ;
    344 ; AVX512BW-LABEL: splatvar_rotate_v32i16:
    345 ; AVX512BW:       # %bb.0:
    346 ; AVX512BW-NEXT:    vpbroadcastw %xmm1, %zmm2
    347 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
    348 ; AVX512BW-NEXT:    vpsubw %zmm2, %zmm3, %zmm2
    349 ; AVX512BW-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
    350 ; AVX512BW-NEXT:    vpsllw %xmm1, %zmm0, %zmm1
    351 ; AVX512BW-NEXT:    vpsrlvw %zmm2, %zmm0, %zmm0
    352 ; AVX512BW-NEXT:    vporq %zmm0, %zmm1, %zmm0
    353 ; AVX512BW-NEXT:    retq
    354 ;
    355 ; AVX512VLBW-LABEL: splatvar_rotate_v32i16:
    356 ; AVX512VLBW:       # %bb.0:
    357 ; AVX512VLBW-NEXT:    vpbroadcastw %xmm1, %zmm2
    358 ; AVX512VLBW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
    359 ; AVX512VLBW-NEXT:    vpsubw %zmm2, %zmm3, %zmm2
    360 ; AVX512VLBW-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
    361 ; AVX512VLBW-NEXT:    vpsllw %xmm1, %zmm0, %zmm1
    362 ; AVX512VLBW-NEXT:    vpsrlvw %zmm2, %zmm0, %zmm0
    363 ; AVX512VLBW-NEXT:    vporq %zmm0, %zmm1, %zmm0
    364 ; AVX512VLBW-NEXT:    retq
    365   %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
    366   %splat16 = sub <32 x i16> <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>, %splat
    367   %shl = shl <32 x i16> %a, %splat
    368   %lshr = lshr <32 x i16> %a, %splat16
    369   %or = or <32 x i16> %shl, %lshr
    370   ret <32 x i16> %or
    371 }
    372 
    373 define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
    374 ; AVX512F-LABEL: splatvar_rotate_v64i8:
    375 ; AVX512F:       # %bb.0:
    376 ; AVX512F-NEXT:    vpbroadcastb %xmm2, %ymm2
    377 ; AVX512F-NEXT:    vpsrlw $4, %ymm0, %ymm3
    378 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    379 ; AVX512F-NEXT:    vpand %ymm4, %ymm3, %ymm3
    380 ; AVX512F-NEXT:    vpsllw $4, %ymm0, %ymm5
    381 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm6 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
    382 ; AVX512F-NEXT:    vpand %ymm6, %ymm5, %ymm5
    383 ; AVX512F-NEXT:    vpor %ymm3, %ymm5, %ymm3
    384 ; AVX512F-NEXT:    vpsllw $5, %ymm2, %ymm2
    385 ; AVX512F-NEXT:    vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
    386 ; AVX512F-NEXT:    vpsrlw $6, %ymm0, %ymm3
    387 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm5 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
    388 ; AVX512F-NEXT:    vpand %ymm5, %ymm3, %ymm3
    389 ; AVX512F-NEXT:    vpsllw $2, %ymm0, %ymm7
    390 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm8 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
    391 ; AVX512F-NEXT:    vpand %ymm8, %ymm7, %ymm7
    392 ; AVX512F-NEXT:    vpor %ymm3, %ymm7, %ymm3
    393 ; AVX512F-NEXT:    vpaddb %ymm2, %ymm2, %ymm7
    394 ; AVX512F-NEXT:    vpblendvb %ymm7, %ymm3, %ymm0, %ymm0
    395 ; AVX512F-NEXT:    vpsrlw $7, %ymm0, %ymm3
    396 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm9 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
    397 ; AVX512F-NEXT:    vpand %ymm9, %ymm3, %ymm3
    398 ; AVX512F-NEXT:    vpaddb %ymm0, %ymm0, %ymm10
    399 ; AVX512F-NEXT:    vpor %ymm3, %ymm10, %ymm3
    400 ; AVX512F-NEXT:    vpaddb %ymm7, %ymm7, %ymm10
    401 ; AVX512F-NEXT:    vpblendvb %ymm10, %ymm3, %ymm0, %ymm0
    402 ; AVX512F-NEXT:    vpsrlw $4, %ymm1, %ymm3
    403 ; AVX512F-NEXT:    vpand %ymm4, %ymm3, %ymm3
    404 ; AVX512F-NEXT:    vpsllw $4, %ymm1, %ymm4
    405 ; AVX512F-NEXT:    vpand %ymm6, %ymm4, %ymm4
    406 ; AVX512F-NEXT:    vpor %ymm3, %ymm4, %ymm3
    407 ; AVX512F-NEXT:    vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
    408 ; AVX512F-NEXT:    vpsrlw $6, %ymm1, %ymm2
    409 ; AVX512F-NEXT:    vpand %ymm5, %ymm2, %ymm2
    410 ; AVX512F-NEXT:    vpsllw $2, %ymm1, %ymm3
    411 ; AVX512F-NEXT:    vpand %ymm8, %ymm3, %ymm3
    412 ; AVX512F-NEXT:    vpor %ymm2, %ymm3, %ymm2
    413 ; AVX512F-NEXT:    vpblendvb %ymm7, %ymm2, %ymm1, %ymm1
    414 ; AVX512F-NEXT:    vpsrlw $7, %ymm1, %ymm2
    415 ; AVX512F-NEXT:    vpand %ymm9, %ymm2, %ymm2
    416 ; AVX512F-NEXT:    vpaddb %ymm1, %ymm1, %ymm3
    417 ; AVX512F-NEXT:    vpor %ymm2, %ymm3, %ymm2
    418 ; AVX512F-NEXT:    vpblendvb %ymm10, %ymm2, %ymm1, %ymm1
    419 ; AVX512F-NEXT:    retq
    420 ;
    421 ; AVX512VL-LABEL: splatvar_rotate_v64i8:
    422 ; AVX512VL:       # %bb.0:
    423 ; AVX512VL-NEXT:    vpbroadcastb %xmm2, %ymm2
    424 ; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm3
    425 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    426 ; AVX512VL-NEXT:    vpand %ymm4, %ymm3, %ymm3
    427 ; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm5
    428 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm6 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
    429 ; AVX512VL-NEXT:    vpand %ymm6, %ymm5, %ymm5
    430 ; AVX512VL-NEXT:    vpor %ymm3, %ymm5, %ymm3
    431 ; AVX512VL-NEXT:    vpsllw $5, %ymm2, %ymm2
    432 ; AVX512VL-NEXT:    vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
    433 ; AVX512VL-NEXT:    vpsrlw $6, %ymm0, %ymm3
    434 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm5 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
    435 ; AVX512VL-NEXT:    vpand %ymm5, %ymm3, %ymm3
    436 ; AVX512VL-NEXT:    vpsllw $2, %ymm0, %ymm7
    437 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm8 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
    438 ; AVX512VL-NEXT:    vpand %ymm8, %ymm7, %ymm7
    439 ; AVX512VL-NEXT:    vpor %ymm3, %ymm7, %ymm3
    440 ; AVX512VL-NEXT:    vpaddb %ymm2, %ymm2, %ymm7
    441 ; AVX512VL-NEXT:    vpblendvb %ymm7, %ymm3, %ymm0, %ymm0
    442 ; AVX512VL-NEXT:    vpsrlw $7, %ymm0, %ymm3
    443 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm9 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
    444 ; AVX512VL-NEXT:    vpand %ymm9, %ymm3, %ymm3
    445 ; AVX512VL-NEXT:    vpaddb %ymm0, %ymm0, %ymm10
    446 ; AVX512VL-NEXT:    vpor %ymm3, %ymm10, %ymm3
    447 ; AVX512VL-NEXT:    vpaddb %ymm7, %ymm7, %ymm10
    448 ; AVX512VL-NEXT:    vpblendvb %ymm10, %ymm3, %ymm0, %ymm0
    449 ; AVX512VL-NEXT:    vpsrlw $4, %ymm1, %ymm3
    450 ; AVX512VL-NEXT:    vpand %ymm4, %ymm3, %ymm3
    451 ; AVX512VL-NEXT:    vpsllw $4, %ymm1, %ymm4
    452 ; AVX512VL-NEXT:    vpand %ymm6, %ymm4, %ymm4
    453 ; AVX512VL-NEXT:    vpor %ymm3, %ymm4, %ymm3
    454 ; AVX512VL-NEXT:    vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
    455 ; AVX512VL-NEXT:    vpsrlw $6, %ymm1, %ymm2
    456 ; AVX512VL-NEXT:    vpand %ymm5, %ymm2, %ymm2
    457 ; AVX512VL-NEXT:    vpsllw $2, %ymm1, %ymm3
    458 ; AVX512VL-NEXT:    vpand %ymm8, %ymm3, %ymm3
    459 ; AVX512VL-NEXT:    vpor %ymm2, %ymm3, %ymm2
    460 ; AVX512VL-NEXT:    vpblendvb %ymm7, %ymm2, %ymm1, %ymm1
    461 ; AVX512VL-NEXT:    vpsrlw $7, %ymm1, %ymm2
    462 ; AVX512VL-NEXT:    vpand %ymm9, %ymm2, %ymm2
    463 ; AVX512VL-NEXT:    vpaddb %ymm1, %ymm1, %ymm3
    464 ; AVX512VL-NEXT:    vpor %ymm2, %ymm3, %ymm2
    465 ; AVX512VL-NEXT:    vpblendvb %ymm10, %ymm2, %ymm1, %ymm1
    466 ; AVX512VL-NEXT:    retq
    467 ;
    468 ; AVX512BW-LABEL: splatvar_rotate_v64i8:
    469 ; AVX512BW:       # %bb.0:
    470 ; AVX512BW-NEXT:    vpbroadcastb %xmm1, %zmm1
    471 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
    472 ; AVX512BW-NEXT:    vpsubb %zmm1, %zmm2, %zmm2
    473 ; AVX512BW-NEXT:    vpsllw $4, %zmm0, %zmm3
    474 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm3, %zmm3
    475 ; AVX512BW-NEXT:    vpsllw $5, %zmm1, %zmm1
    476 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
    477 ; AVX512BW-NEXT:    vpblendmb %zmm3, %zmm0, %zmm3 {%k1}
    478 ; AVX512BW-NEXT:    vpsllw $2, %zmm3, %zmm4
    479 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm4, %zmm4
    480 ; AVX512BW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
    481 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
    482 ; AVX512BW-NEXT:    vmovdqu8 %zmm4, %zmm3 {%k1}
    483 ; AVX512BW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
    484 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
    485 ; AVX512BW-NEXT:    vpaddb %zmm3, %zmm3, %zmm3 {%k1}
    486 ; AVX512BW-NEXT:    vpsllw $5, %zmm2, %zmm1
    487 ; AVX512BW-NEXT:    vpaddb %zmm1, %zmm1, %zmm2
    488 ; AVX512BW-NEXT:    vpmovb2m %zmm2, %k1
    489 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k2
    490 ; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm1
    491 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
    492 ; AVX512BW-NEXT:    vmovdqu8 %zmm1, %zmm0 {%k2}
    493 ; AVX512BW-NEXT:    vpsrlw $2, %zmm0, %zmm1
    494 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
    495 ; AVX512BW-NEXT:    vmovdqu8 %zmm1, %zmm0 {%k1}
    496 ; AVX512BW-NEXT:    vpsrlw $1, %zmm0, %zmm1
    497 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
    498 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm2, %zmm2
    499 ; AVX512BW-NEXT:    vpmovb2m %zmm2, %k1
    500 ; AVX512BW-NEXT:    vmovdqu8 %zmm1, %zmm0 {%k1}
    501 ; AVX512BW-NEXT:    vporq %zmm0, %zmm3, %zmm0
    502 ; AVX512BW-NEXT:    retq
    503 ;
    504 ; AVX512VLBW-LABEL: splatvar_rotate_v64i8:
    505 ; AVX512VLBW:       # %bb.0:
    506 ; AVX512VLBW-NEXT:    vpbroadcastb %xmm1, %zmm1
    507 ; AVX512VLBW-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
    508 ; AVX512VLBW-NEXT:    vpsubb %zmm1, %zmm2, %zmm2
    509 ; AVX512VLBW-NEXT:    vpsllw $4, %zmm0, %zmm3
    510 ; AVX512VLBW-NEXT:    vpandq {{.*}}(%rip), %zmm3, %zmm3
    511 ; AVX512VLBW-NEXT:    vpsllw $5, %zmm1, %zmm1
    512 ; AVX512VLBW-NEXT:    vpmovb2m %zmm1, %k1
    513 ; AVX512VLBW-NEXT:    vpblendmb %zmm3, %zmm0, %zmm3 {%k1}
    514 ; AVX512VLBW-NEXT:    vpsllw $2, %zmm3, %zmm4
    515 ; AVX512VLBW-NEXT:    vpandq {{.*}}(%rip), %zmm4, %zmm4
    516 ; AVX512VLBW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
    517 ; AVX512VLBW-NEXT:    vpmovb2m %zmm1, %k1
    518 ; AVX512VLBW-NEXT:    vmovdqu8 %zmm4, %zmm3 {%k1}
    519 ; AVX512VLBW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
    520 ; AVX512VLBW-NEXT:    vpmovb2m %zmm1, %k1
    521 ; AVX512VLBW-NEXT:    vpaddb %zmm3, %zmm3, %zmm3 {%k1}
    522 ; AVX512VLBW-NEXT:    vpsllw $5, %zmm2, %zmm1
    523 ; AVX512VLBW-NEXT:    vpaddb %zmm1, %zmm1, %zmm2
    524 ; AVX512VLBW-NEXT:    vpmovb2m %zmm2, %k1
    525 ; AVX512VLBW-NEXT:    vpmovb2m %zmm1, %k2
    526 ; AVX512VLBW-NEXT:    vpsrlw $4, %zmm0, %zmm1
    527 ; AVX512VLBW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
    528 ; AVX512VLBW-NEXT:    vmovdqu8 %zmm1, %zmm0 {%k2}
    529 ; AVX512VLBW-NEXT:    vpsrlw $2, %zmm0, %zmm1
    530 ; AVX512VLBW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
    531 ; AVX512VLBW-NEXT:    vmovdqu8 %zmm1, %zmm0 {%k1}
    532 ; AVX512VLBW-NEXT:    vpsrlw $1, %zmm0, %zmm1
    533 ; AVX512VLBW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
    534 ; AVX512VLBW-NEXT:    vpaddb %zmm2, %zmm2, %zmm2
    535 ; AVX512VLBW-NEXT:    vpmovb2m %zmm2, %k1
    536 ; AVX512VLBW-NEXT:    vmovdqu8 %zmm1, %zmm0 {%k1}
    537 ; AVX512VLBW-NEXT:    vporq %zmm0, %zmm3, %zmm0
    538 ; AVX512VLBW-NEXT:    retq
    539   %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
    540   %splat8 = sub <64 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>, %splat
    541   %shl = shl <64 x i8> %a, %splat
    542   %lshr = lshr <64 x i8> %a, %splat8
    543   %or = or <64 x i8> %shl, %lshr
    544   ret <64 x i8> %or
    545 }
    546 
    547 ;
    548 ; Constant Rotates
    549 ;
    550 
    551 define <8 x i64> @constant_rotate_v8i64(<8 x i64> %a) nounwind {
    552 ; AVX512-LABEL: constant_rotate_v8i64:
    553 ; AVX512:       # %bb.0:
    554 ; AVX512-NEXT:    vprolvq {{.*}}(%rip), %zmm0, %zmm0
    555 ; AVX512-NEXT:    retq
    556   %shl = shl <8 x i64> %a, <i64 4, i64 14, i64 50, i64 60, i64 4, i64 14, i64 50, i64 60>
    557   %lshr = lshr <8 x i64> %a, <i64 60, i64 50, i64 14, i64 4, i64 60, i64 50, i64 14, i64 4>
    558   %or = or <8 x i64> %shl, %lshr
    559   ret <8 x i64> %or
    560 }
    561 
    562 define <16 x i32> @constant_rotate_v16i32(<16 x i32> %a) nounwind {
    563 ; AVX512-LABEL: constant_rotate_v16i32:
    564 ; AVX512:       # %bb.0:
    565 ; AVX512-NEXT:    vprolvd {{.*}}(%rip), %zmm0, %zmm0
    566 ; AVX512-NEXT:    retq
    567   %shl = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
    568   %lshr = lshr <16 x i32> %a, <i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21>
    569   %or = or <16 x i32> %shl, %lshr
    570   ret <16 x i32> %or
    571 }
    572 
    573 define <32 x i16> @constant_rotate_v32i16(<32 x i16> %a) nounwind {
    574 ; AVX512F-LABEL: constant_rotate_v32i16:
    575 ; AVX512F:       # %bb.0:
    576 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
    577 ; AVX512F-NEXT:    vpmulhuw %ymm2, %ymm0, %ymm3
    578 ; AVX512F-NEXT:    vpmullw %ymm2, %ymm0, %ymm0
    579 ; AVX512F-NEXT:    vpor %ymm3, %ymm0, %ymm0
    580 ; AVX512F-NEXT:    vpmulhuw %ymm2, %ymm1, %ymm3
    581 ; AVX512F-NEXT:    vpmullw %ymm2, %ymm1, %ymm1
    582 ; AVX512F-NEXT:    vpor %ymm3, %ymm1, %ymm1
    583 ; AVX512F-NEXT:    retq
    584 ;
    585 ; AVX512VL-LABEL: constant_rotate_v32i16:
    586 ; AVX512VL:       # %bb.0:
    587 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
    588 ; AVX512VL-NEXT:    vpmulhuw %ymm2, %ymm0, %ymm3
    589 ; AVX512VL-NEXT:    vpmullw %ymm2, %ymm0, %ymm0
    590 ; AVX512VL-NEXT:    vpor %ymm3, %ymm0, %ymm0
    591 ; AVX512VL-NEXT:    vpmulhuw %ymm2, %ymm1, %ymm3
    592 ; AVX512VL-NEXT:    vpmullw %ymm2, %ymm1, %ymm1
    593 ; AVX512VL-NEXT:    vpor %ymm3, %ymm1, %ymm1
    594 ; AVX512VL-NEXT:    retq
    595 ;
    596 ; AVX512BW-LABEL: constant_rotate_v32i16:
    597 ; AVX512BW:       # %bb.0:
    598 ; AVX512BW-NEXT:    vpsllvw {{.*}}(%rip), %zmm0, %zmm1
    599 ; AVX512BW-NEXT:    vpsrlvw {{.*}}(%rip), %zmm0, %zmm0
    600 ; AVX512BW-NEXT:    vporq %zmm0, %zmm1, %zmm0
    601 ; AVX512BW-NEXT:    retq
    602 ;
    603 ; AVX512VLBW-LABEL: constant_rotate_v32i16:
    604 ; AVX512VLBW:       # %bb.0:
    605 ; AVX512VLBW-NEXT:    vpsllvw {{.*}}(%rip), %zmm0, %zmm1
    606 ; AVX512VLBW-NEXT:    vpsrlvw {{.*}}(%rip), %zmm0, %zmm0
    607 ; AVX512VLBW-NEXT:    vporq %zmm0, %zmm1, %zmm0
    608 ; AVX512VLBW-NEXT:    retq
    609   %shl = shl <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
    610   %lshr = lshr <32 x i16> %a, <i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1>
    611   %or = or <32 x i16> %shl, %lshr
    612   ret <32 x i16> %or
    613 }
    614 
    615 define <64 x i8> @constant_rotate_v64i8(<64 x i8> %a) nounwind {
    616 ; AVX512F-LABEL: constant_rotate_v64i8:
    617 ; AVX512F:       # %bb.0:
    618 ; AVX512F-NEXT:    vpsrlw $4, %ymm0, %ymm2
    619 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    620 ; AVX512F-NEXT:    vpand %ymm3, %ymm2, %ymm2
    621 ; AVX512F-NEXT:    vpsllw $4, %ymm0, %ymm4
    622 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
    623 ; AVX512F-NEXT:    vpand %ymm5, %ymm4, %ymm4
    624 ; AVX512F-NEXT:    vpor %ymm2, %ymm4, %ymm2
    625 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm4 = [8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256]
    626 ; AVX512F-NEXT:    vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
    627 ; AVX512F-NEXT:    vpsrlw $6, %ymm0, %ymm2
    628 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm6 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
    629 ; AVX512F-NEXT:    vpand %ymm6, %ymm2, %ymm2
    630 ; AVX512F-NEXT:    vpsllw $2, %ymm0, %ymm7
    631 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm8 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
    632 ; AVX512F-NEXT:    vpand %ymm8, %ymm7, %ymm7
    633 ; AVX512F-NEXT:    vpor %ymm2, %ymm7, %ymm2
    634 ; AVX512F-NEXT:    vpaddb %ymm4, %ymm4, %ymm7
    635 ; AVX512F-NEXT:    vpblendvb %ymm7, %ymm2, %ymm0, %ymm0
    636 ; AVX512F-NEXT:    vpsrlw $7, %ymm0, %ymm2
    637 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm9 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
    638 ; AVX512F-NEXT:    vpand %ymm9, %ymm2, %ymm2
    639 ; AVX512F-NEXT:    vpaddb %ymm0, %ymm0, %ymm10
    640 ; AVX512F-NEXT:    vpor %ymm2, %ymm10, %ymm2
    641 ; AVX512F-NEXT:    vpaddb %ymm7, %ymm7, %ymm10
    642 ; AVX512F-NEXT:    vpblendvb %ymm10, %ymm2, %ymm0, %ymm0
    643 ; AVX512F-NEXT:    vpsrlw $4, %ymm1, %ymm2
    644 ; AVX512F-NEXT:    vpand %ymm3, %ymm2, %ymm2
    645 ; AVX512F-NEXT:    vpsllw $4, %ymm1, %ymm3
    646 ; AVX512F-NEXT:    vpand %ymm5, %ymm3, %ymm3
    647 ; AVX512F-NEXT:    vpor %ymm2, %ymm3, %ymm2
    648 ; AVX512F-NEXT:    vpblendvb %ymm4, %ymm2, %ymm1, %ymm1
    649 ; AVX512F-NEXT:    vpsrlw $6, %ymm1, %ymm2
    650 ; AVX512F-NEXT:    vpand %ymm6, %ymm2, %ymm2
    651 ; AVX512F-NEXT:    vpsllw $2, %ymm1, %ymm3
    652 ; AVX512F-NEXT:    vpand %ymm8, %ymm3, %ymm3
    653 ; AVX512F-NEXT:    vpor %ymm2, %ymm3, %ymm2
    654 ; AVX512F-NEXT:    vpblendvb %ymm7, %ymm2, %ymm1, %ymm1
    655 ; AVX512F-NEXT:    vpsrlw $7, %ymm1, %ymm2
    656 ; AVX512F-NEXT:    vpand %ymm9, %ymm2, %ymm2
    657 ; AVX512F-NEXT:    vpaddb %ymm1, %ymm1, %ymm3
    658 ; AVX512F-NEXT:    vpor %ymm2, %ymm3, %ymm2
    659 ; AVX512F-NEXT:    vpblendvb %ymm10, %ymm2, %ymm1, %ymm1
    660 ; AVX512F-NEXT:    retq
    661 ;
    662 ; AVX512VL-LABEL: constant_rotate_v64i8:
    663 ; AVX512VL:       # %bb.0:
    664 ; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm2
    665 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    666 ; AVX512VL-NEXT:    vpand %ymm3, %ymm2, %ymm2
    667 ; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm4
    668 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
    669 ; AVX512VL-NEXT:    vpand %ymm5, %ymm4, %ymm4
    670 ; AVX512VL-NEXT:    vpor %ymm2, %ymm4, %ymm2
    671 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm4 = [8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256]
    672 ; AVX512VL-NEXT:    vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
    673 ; AVX512VL-NEXT:    vpsrlw $6, %ymm0, %ymm2
    674 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm6 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
    675 ; AVX512VL-NEXT:    vpand %ymm6, %ymm2, %ymm2
    676 ; AVX512VL-NEXT:    vpsllw $2, %ymm0, %ymm7
    677 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm8 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
    678 ; AVX512VL-NEXT:    vpand %ymm8, %ymm7, %ymm7
    679 ; AVX512VL-NEXT:    vpor %ymm2, %ymm7, %ymm2
    680 ; AVX512VL-NEXT:    vpaddb %ymm4, %ymm4, %ymm7
    681 ; AVX512VL-NEXT:    vpblendvb %ymm7, %ymm2, %ymm0, %ymm0
    682 ; AVX512VL-NEXT:    vpsrlw $7, %ymm0, %ymm2
    683 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm9 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
    684 ; AVX512VL-NEXT:    vpand %ymm9, %ymm2, %ymm2
    685 ; AVX512VL-NEXT:    vpaddb %ymm0, %ymm0, %ymm10
    686 ; AVX512VL-NEXT:    vpor %ymm2, %ymm10, %ymm2
    687 ; AVX512VL-NEXT:    vpaddb %ymm7, %ymm7, %ymm10
    688 ; AVX512VL-NEXT:    vpblendvb %ymm10, %ymm2, %ymm0, %ymm0
    689 ; AVX512VL-NEXT:    vpsrlw $4, %ymm1, %ymm2
    690 ; AVX512VL-NEXT:    vpand %ymm3, %ymm2, %ymm2
    691 ; AVX512VL-NEXT:    vpsllw $4, %ymm1, %ymm3
    692 ; AVX512VL-NEXT:    vpand %ymm5, %ymm3, %ymm3
    693 ; AVX512VL-NEXT:    vpor %ymm2, %ymm3, %ymm2
    694 ; AVX512VL-NEXT:    vpblendvb %ymm4, %ymm2, %ymm1, %ymm1
    695 ; AVX512VL-NEXT:    vpsrlw $6, %ymm1, %ymm2
    696 ; AVX512VL-NEXT:    vpand %ymm6, %ymm2, %ymm2
    697 ; AVX512VL-NEXT:    vpsllw $2, %ymm1, %ymm3
    698 ; AVX512VL-NEXT:    vpand %ymm8, %ymm3, %ymm3
    699 ; AVX512VL-NEXT:    vpor %ymm2, %ymm3, %ymm2
    700 ; AVX512VL-NEXT:    vpblendvb %ymm7, %ymm2, %ymm1, %ymm1
    701 ; AVX512VL-NEXT:    vpsrlw $7, %ymm1, %ymm2
    702 ; AVX512VL-NEXT:    vpand %ymm9, %ymm2, %ymm2
    703 ; AVX512VL-NEXT:    vpaddb %ymm1, %ymm1, %ymm3
    704 ; AVX512VL-NEXT:    vpor %ymm2, %ymm3, %ymm2
    705 ; AVX512VL-NEXT:    vpblendvb %ymm10, %ymm2, %ymm1, %ymm1
    706 ; AVX512VL-NEXT:    retq
    707 ;
    708 ; AVX512BW-LABEL: constant_rotate_v64i8:
    709 ; AVX512BW:       # %bb.0:
    710 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256]
    711 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
    712 ; AVX512BW-NEXT:    vpsllw $4, %zmm0, %zmm2
    713 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
    714 ; AVX512BW-NEXT:    vpblendmb %zmm2, %zmm0, %zmm2 {%k1}
    715 ; AVX512BW-NEXT:    vpsllw $2, %zmm2, %zmm3
    716 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm3, %zmm3
    717 ; AVX512BW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
    718 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
    719 ; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm2 {%k1}
    720 ; AVX512BW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
    721 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
    722 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm2, %zmm2 {%k1}
    723 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536]
    724 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
    725 ; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm3
    726 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm3, %zmm3
    727 ; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm0 {%k1}
    728 ; AVX512BW-NEXT:    vpsrlw $2, %zmm0, %zmm3
    729 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm3, %zmm3
    730 ; AVX512BW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
    731 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
    732 ; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm0 {%k1}
    733 ; AVX512BW-NEXT:    vpsrlw $1, %zmm0, %zmm3
    734 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm3, %zmm3
    735 ; AVX512BW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
    736 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
    737 ; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm0 {%k1}
    738 ; AVX512BW-NEXT:    vporq %zmm0, %zmm2, %zmm0
    739 ; AVX512BW-NEXT:    retq
    740 ;
    741 ; AVX512VLBW-LABEL: constant_rotate_v64i8:
    742 ; AVX512VLBW:       # %bb.0:
    743 ; AVX512VLBW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256]
    744 ; AVX512VLBW-NEXT:    vpmovb2m %zmm1, %k1
    745 ; AVX512VLBW-NEXT:    vpsllw $4, %zmm0, %zmm2
    746 ; AVX512VLBW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
    747 ; AVX512VLBW-NEXT:    vpblendmb %zmm2, %zmm0, %zmm2 {%k1}
    748 ; AVX512VLBW-NEXT:    vpsllw $2, %zmm2, %zmm3
    749 ; AVX512VLBW-NEXT:    vpandq {{.*}}(%rip), %zmm3, %zmm3
    750 ; AVX512VLBW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
    751 ; AVX512VLBW-NEXT:    vpmovb2m %zmm1, %k1
    752 ; AVX512VLBW-NEXT:    vmovdqu8 %zmm3, %zmm2 {%k1}
    753 ; AVX512VLBW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
    754 ; AVX512VLBW-NEXT:    vpmovb2m %zmm1, %k1
    755 ; AVX512VLBW-NEXT:    vpaddb %zmm2, %zmm2, %zmm2 {%k1}
    756 ; AVX512VLBW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536]
    757 ; AVX512VLBW-NEXT:    vpmovb2m %zmm1, %k1
    758 ; AVX512VLBW-NEXT:    vpsrlw $4, %zmm0, %zmm3
    759 ; AVX512VLBW-NEXT:    vpandq {{.*}}(%rip), %zmm3, %zmm3
    760 ; AVX512VLBW-NEXT:    vmovdqu8 %zmm3, %zmm0 {%k1}
    761 ; AVX512VLBW-NEXT:    vpsrlw $2, %zmm0, %zmm3
    762 ; AVX512VLBW-NEXT:    vpandq {{.*}}(%rip), %zmm3, %zmm3
    763 ; AVX512VLBW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
    764 ; AVX512VLBW-NEXT:    vpmovb2m %zmm1, %k1
    765 ; AVX512VLBW-NEXT:    vmovdqu8 %zmm3, %zmm0 {%k1}
    766 ; AVX512VLBW-NEXT:    vpsrlw $1, %zmm0, %zmm3
    767 ; AVX512VLBW-NEXT:    vpandq {{.*}}(%rip), %zmm3, %zmm3
    768 ; AVX512VLBW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
    769 ; AVX512VLBW-NEXT:    vpmovb2m %zmm1, %k1
    770 ; AVX512VLBW-NEXT:    vmovdqu8 %zmm3, %zmm0 {%k1}
    771 ; AVX512VLBW-NEXT:    vporq %zmm0, %zmm2, %zmm0
    772 ; AVX512VLBW-NEXT:    retq
    773   %shl = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1>
    774   %lshr = lshr <64 x i8> %a, <i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
    775   %or = or <64 x i8> %shl, %lshr
    776   ret <64 x i8> %or
    777 }
    778 
    779 ;
    780 ; Uniform Constant Rotates
    781 ;
    782 
    783 define <8 x i64> @splatconstant_rotate_v8i64(<8 x i64> %a) nounwind {
    784 ; AVX512-LABEL: splatconstant_rotate_v8i64:
    785 ; AVX512:       # %bb.0:
    786 ; AVX512-NEXT:    vprolq $14, %zmm0, %zmm0
    787 ; AVX512-NEXT:    retq
    788   %shl = shl <8 x i64> %a, <i64 14, i64 14, i64 14, i64 14, i64 14, i64 14, i64 14, i64 14>
    789   %lshr = lshr <8 x i64> %a, <i64 50, i64 50, i64 50, i64 50, i64 50, i64 50, i64 50, i64 50>
    790   %or = or <8 x i64> %shl, %lshr
    791   ret <8 x i64> %or
    792 }
    793 
    794 define <16 x i32> @splatconstant_rotate_v16i32(<16 x i32> %a) nounwind {
    795 ; AVX512-LABEL: splatconstant_rotate_v16i32:
    796 ; AVX512:       # %bb.0:
    797 ; AVX512-NEXT:    vprold $4, %zmm0, %zmm0
    798 ; AVX512-NEXT:    retq
    799   %shl = shl <16 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
    800   %lshr = lshr <16 x i32> %a, <i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28>
    801   %or = or <16 x i32> %shl, %lshr
    802   ret <16 x i32> %or
    803 }
    804 
    805 define <32 x i16> @splatconstant_rotate_v32i16(<32 x i16> %a) nounwind {
    806 ; AVX512F-LABEL: splatconstant_rotate_v32i16:
    807 ; AVX512F:       # %bb.0:
    808 ; AVX512F-NEXT:    vpsrlw $9, %ymm0, %ymm2
    809 ; AVX512F-NEXT:    vpsllw $7, %ymm0, %ymm0
    810 ; AVX512F-NEXT:    vpor %ymm2, %ymm0, %ymm0
    811 ; AVX512F-NEXT:    vpsrlw $9, %ymm1, %ymm2
    812 ; AVX512F-NEXT:    vpsllw $7, %ymm1, %ymm1
    813 ; AVX512F-NEXT:    vpor %ymm2, %ymm1, %ymm1
    814 ; AVX512F-NEXT:    retq
    815 ;
    816 ; AVX512VL-LABEL: splatconstant_rotate_v32i16:
    817 ; AVX512VL:       # %bb.0:
    818 ; AVX512VL-NEXT:    vpsrlw $9, %ymm0, %ymm2
    819 ; AVX512VL-NEXT:    vpsllw $7, %ymm0, %ymm0
    820 ; AVX512VL-NEXT:    vpor %ymm2, %ymm0, %ymm0
    821 ; AVX512VL-NEXT:    vpsrlw $9, %ymm1, %ymm2
    822 ; AVX512VL-NEXT:    vpsllw $7, %ymm1, %ymm1
    823 ; AVX512VL-NEXT:    vpor %ymm2, %ymm1, %ymm1
    824 ; AVX512VL-NEXT:    retq
    825 ;
    826 ; AVX512BW-LABEL: splatconstant_rotate_v32i16:
    827 ; AVX512BW:       # %bb.0:
    828 ; AVX512BW-NEXT:    vpsllw $7, %zmm0, %zmm1
    829 ; AVX512BW-NEXT:    vpsrlw $9, %zmm0, %zmm0
    830 ; AVX512BW-NEXT:    vporq %zmm0, %zmm1, %zmm0
    831 ; AVX512BW-NEXT:    retq
    832 ;
    833 ; AVX512VLBW-LABEL: splatconstant_rotate_v32i16:
    834 ; AVX512VLBW:       # %bb.0:
    835 ; AVX512VLBW-NEXT:    vpsllw $7, %zmm0, %zmm1
    836 ; AVX512VLBW-NEXT:    vpsrlw $9, %zmm0, %zmm0
    837 ; AVX512VLBW-NEXT:    vporq %zmm0, %zmm1, %zmm0
    838 ; AVX512VLBW-NEXT:    retq
    839   %shl = shl <32 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
    840   %lshr = lshr <32 x i16> %a, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
    841   %or = or <32 x i16> %shl, %lshr
    842   ret <32 x i16> %or
    843 }
    844 
    845 define <64 x i8> @splatconstant_rotate_v64i8(<64 x i8> %a) nounwind {
    846 ; AVX512F-LABEL: splatconstant_rotate_v64i8:
    847 ; AVX512F:       # %bb.0:
    848 ; AVX512F-NEXT:    vpsrlw $4, %ymm0, %ymm2
    849 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    850 ; AVX512F-NEXT:    vpand %ymm3, %ymm2, %ymm2
    851 ; AVX512F-NEXT:    vpsllw $4, %ymm0, %ymm0
    852 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
    853 ; AVX512F-NEXT:    vpand %ymm4, %ymm0, %ymm0
    854 ; AVX512F-NEXT:    vpor %ymm2, %ymm0, %ymm0
    855 ; AVX512F-NEXT:    vpsrlw $4, %ymm1, %ymm2
    856 ; AVX512F-NEXT:    vpand %ymm3, %ymm2, %ymm2
    857 ; AVX512F-NEXT:    vpsllw $4, %ymm1, %ymm1
    858 ; AVX512F-NEXT:    vpand %ymm4, %ymm1, %ymm1
    859 ; AVX512F-NEXT:    vpor %ymm2, %ymm1, %ymm1
    860 ; AVX512F-NEXT:    retq
    861 ;
    862 ; AVX512VL-LABEL: splatconstant_rotate_v64i8:
    863 ; AVX512VL:       # %bb.0:
    864 ; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm2
    865 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    866 ; AVX512VL-NEXT:    vpand %ymm3, %ymm2, %ymm2
    867 ; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm0
    868 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
    869 ; AVX512VL-NEXT:    vpand %ymm4, %ymm0, %ymm0
    870 ; AVX512VL-NEXT:    vpor %ymm2, %ymm0, %ymm0
    871 ; AVX512VL-NEXT:    vpsrlw $4, %ymm1, %ymm2
    872 ; AVX512VL-NEXT:    vpand %ymm3, %ymm2, %ymm2
    873 ; AVX512VL-NEXT:    vpsllw $4, %ymm1, %ymm1
    874 ; AVX512VL-NEXT:    vpand %ymm4, %ymm1, %ymm1
    875 ; AVX512VL-NEXT:    vpor %ymm2, %ymm1, %ymm1
    876 ; AVX512VL-NEXT:    retq
    877 ;
    878 ; AVX512BW-LABEL: splatconstant_rotate_v64i8:
    879 ; AVX512BW:       # %bb.0:
    880 ; AVX512BW-NEXT:    vpsllw $4, %zmm0, %zmm1
    881 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
    882 ; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
    883 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
    884 ; AVX512BW-NEXT:    vporq %zmm0, %zmm1, %zmm0
    885 ; AVX512BW-NEXT:    retq
    886 ;
    887 ; AVX512VLBW-LABEL: splatconstant_rotate_v64i8:
    888 ; AVX512VLBW:       # %bb.0:
    889 ; AVX512VLBW-NEXT:    vpsllw $4, %zmm0, %zmm1
    890 ; AVX512VLBW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
    891 ; AVX512VLBW-NEXT:    vpsrlw $4, %zmm0, %zmm0
    892 ; AVX512VLBW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
    893 ; AVX512VLBW-NEXT:    vporq %zmm0, %zmm1, %zmm0
    894 ; AVX512VLBW-NEXT:    retq
    895   %shl = shl <64 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
    896   %lshr = lshr <64 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
    897   %or = or <64 x i8> %shl, %lshr
    898   ret <64 x i8> %or
    899 }
    900 
    901 ;
    902 ; Masked Uniform Constant Rotates
    903 ;
    904 
    905 define <8 x i64> @splatconstant_rotate_mask_v8i64(<8 x i64> %a) nounwind {
    906 ; AVX512-LABEL: splatconstant_rotate_mask_v8i64:
    907 ; AVX512:       # %bb.0:
    908 ; AVX512-NEXT:    vprolq $15, %zmm0, %zmm0
    909 ; AVX512-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
    910 ; AVX512-NEXT:    retq
    911   %shl = shl <8 x i64> %a, <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
    912   %lshr = lshr <8 x i64> %a, <i64 49, i64 49, i64 49, i64 49, i64 49, i64 49, i64 49, i64 49>
    913   %rmask = and <8 x i64> %lshr, <i64 255, i64 127, i64 127, i64 255, i64 255, i64 127, i64 127, i64 255>
    914   %lmask = and <8 x i64> %shl, <i64 33, i64 65, i64 129, i64 257, i64 33, i64 65, i64 129, i64 257>
    915   %or = or <8 x i64> %lmask, %rmask
    916   ret <8 x i64> %or
    917 }
    918 
    919 define <16 x i32> @splatconstant_rotate_mask_v16i32(<16 x i32> %a) nounwind {
    920 ; AVX512-LABEL: splatconstant_rotate_mask_v16i32:
    921 ; AVX512:       # %bb.0:
    922 ; AVX512-NEXT:    vprold $4, %zmm0, %zmm0
    923 ; AVX512-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
    924 ; AVX512-NEXT:    retq
    925   %shl = shl <16 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
    926   %lshr = lshr <16 x i32> %a, <i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28>
    927   %rmask = and <16 x i32> %lshr, <i32 3, i32 7, i32 15, i32 31, i32 63, i32 127, i32 255, i32 511, i32 3, i32 7, i32 15, i32 31, i32 63, i32 127, i32 255, i32 511>
    928   %lmask = and <16 x i32> %shl, <i32 511, i32 255, i32 127, i32 63, i32 31, i32 15, i32 7, i32 3, i32 511, i32 255, i32 127, i32 63, i32 31, i32 15, i32 7, i32 3>
    929   %or = or <16 x i32> %lmask, %rmask
    930   ret <16 x i32> %or
    931 }
    932 
    933 define <32 x i16> @splatconstant_rotate_mask_v32i16(<32 x i16> %a) nounwind {
    934 ; AVX512F-LABEL: splatconstant_rotate_mask_v32i16:
    935 ; AVX512F:       # %bb.0:
    936 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm2 = [55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55]
    937 ; AVX512F-NEXT:    vpsrlw $11, %ymm0, %ymm3
    938 ; AVX512F-NEXT:    vpsllw $5, %ymm0, %ymm0
    939 ; AVX512F-NEXT:    vpor %ymm3, %ymm0, %ymm0
    940 ; AVX512F-NEXT:    vpand %ymm2, %ymm0, %ymm0
    941 ; AVX512F-NEXT:    vpsrlw $11, %ymm1, %ymm3
    942 ; AVX512F-NEXT:    vpsllw $5, %ymm1, %ymm1
    943 ; AVX512F-NEXT:    vpor %ymm3, %ymm1, %ymm1
    944 ; AVX512F-NEXT:    vpand %ymm2, %ymm1, %ymm1
    945 ; AVX512F-NEXT:    retq
    946 ;
    947 ; AVX512VL-LABEL: splatconstant_rotate_mask_v32i16:
    948 ; AVX512VL:       # %bb.0:
    949 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55]
    950 ; AVX512VL-NEXT:    vpsrlw $11, %ymm0, %ymm3
    951 ; AVX512VL-NEXT:    vpsllw $5, %ymm0, %ymm0
    952 ; AVX512VL-NEXT:    vpor %ymm3, %ymm0, %ymm0
    953 ; AVX512VL-NEXT:    vpand %ymm2, %ymm0, %ymm0
    954 ; AVX512VL-NEXT:    vpsrlw $11, %ymm1, %ymm3
    955 ; AVX512VL-NEXT:    vpsllw $5, %ymm1, %ymm1
    956 ; AVX512VL-NEXT:    vpor %ymm3, %ymm1, %ymm1
    957 ; AVX512VL-NEXT:    vpand %ymm2, %ymm1, %ymm1
    958 ; AVX512VL-NEXT:    retq
    959 ;
    960 ; AVX512BW-LABEL: splatconstant_rotate_mask_v32i16:
    961 ; AVX512BW:       # %bb.0:
    962 ; AVX512BW-NEXT:    vpsllw $5, %zmm0, %zmm1
    963 ; AVX512BW-NEXT:    vpsrlw $11, %zmm0, %zmm0
    964 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
    965 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
    966 ; AVX512BW-NEXT:    vporq %zmm0, %zmm1, %zmm0
    967 ; AVX512BW-NEXT:    retq
    968 ;
    969 ; AVX512VLBW-LABEL: splatconstant_rotate_mask_v32i16:
    970 ; AVX512VLBW:       # %bb.0:
    971 ; AVX512VLBW-NEXT:    vpsllw $5, %zmm0, %zmm1
    972 ; AVX512VLBW-NEXT:    vpsrlw $11, %zmm0, %zmm0
    973 ; AVX512VLBW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
    974 ; AVX512VLBW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
    975 ; AVX512VLBW-NEXT:    vporq %zmm0, %zmm1, %zmm0
    976 ; AVX512VLBW-NEXT:    retq
    977   %shl = shl <32 x i16> %a, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
    978   %lshr = lshr <32 x i16> %a, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
    979   %rmask = and <32 x i16> %lshr, <i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55>
    980   %lmask = and <32 x i16> %shl, <i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33>
    981   %or = or <32 x i16> %lmask, %rmask
    982   ret <32 x i16> %or
    983 }
    984 
    985 define <64 x i8> @splatconstant_rotate_mask_v64i8(<64 x i8> %a) nounwind {
    986 ; AVX512F-LABEL: splatconstant_rotate_mask_v64i8:
    987 ; AVX512F:       # %bb.0:
    988 ; AVX512F-NEXT:    vpsrlw $4, %ymm0, %ymm2
    989 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    990 ; AVX512F-NEXT:    vpand %ymm3, %ymm2, %ymm2
    991 ; AVX512F-NEXT:    vpsllw $4, %ymm0, %ymm0
    992 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
    993 ; AVX512F-NEXT:    vpand %ymm4, %ymm0, %ymm0
    994 ; AVX512F-NEXT:    vpor %ymm2, %ymm0, %ymm0
    995 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm2 = [39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39]
    996 ; AVX512F-NEXT:    vpand %ymm2, %ymm0, %ymm0
    997 ; AVX512F-NEXT:    vpsrlw $4, %ymm1, %ymm5
    998 ; AVX512F-NEXT:    vpand %ymm3, %ymm5, %ymm3
    999 ; AVX512F-NEXT:    vpsllw $4, %ymm1, %ymm1
   1000 ; AVX512F-NEXT:    vpand %ymm4, %ymm1, %ymm1
   1001 ; AVX512F-NEXT:    vpor %ymm3, %ymm1, %ymm1
   1002 ; AVX512F-NEXT:    vpand %ymm2, %ymm1, %ymm1
   1003 ; AVX512F-NEXT:    retq
   1004 ;
   1005 ; AVX512VL-LABEL: splatconstant_rotate_mask_v64i8:
   1006 ; AVX512VL:       # %bb.0:
   1007 ; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm2
   1008 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
   1009 ; AVX512VL-NEXT:    vpand %ymm3, %ymm2, %ymm2
   1010 ; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm0
   1011 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
   1012 ; AVX512VL-NEXT:    vpand %ymm4, %ymm0, %ymm0
   1013 ; AVX512VL-NEXT:    vpor %ymm2, %ymm0, %ymm0
   1014 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39]
   1015 ; AVX512VL-NEXT:    vpand %ymm2, %ymm0, %ymm0
   1016 ; AVX512VL-NEXT:    vpsrlw $4, %ymm1, %ymm5
   1017 ; AVX512VL-NEXT:    vpand %ymm3, %ymm5, %ymm3
   1018 ; AVX512VL-NEXT:    vpsllw $4, %ymm1, %ymm1
   1019 ; AVX512VL-NEXT:    vpand %ymm4, %ymm1, %ymm1
   1020 ; AVX512VL-NEXT:    vpor %ymm3, %ymm1, %ymm1
   1021 ; AVX512VL-NEXT:    vpand %ymm2, %ymm1, %ymm1
   1022 ; AVX512VL-NEXT:    retq
   1023 ;
   1024 ; AVX512BW-LABEL: splatconstant_rotate_mask_v64i8:
   1025 ; AVX512BW:       # %bb.0:
   1026 ; AVX512BW-NEXT:    vpsllw $4, %zmm0, %zmm1
   1027 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
   1028 ; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
   1029 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
   1030 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
   1031 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
   1032 ; AVX512BW-NEXT:    vporq %zmm0, %zmm1, %zmm0
   1033 ; AVX512BW-NEXT:    retq
   1034 ;
   1035 ; AVX512VLBW-LABEL: splatconstant_rotate_mask_v64i8:
   1036 ; AVX512VLBW:       # %bb.0:
   1037 ; AVX512VLBW-NEXT:    vpsllw $4, %zmm0, %zmm1
   1038 ; AVX512VLBW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
   1039 ; AVX512VLBW-NEXT:    vpsrlw $4, %zmm0, %zmm0
   1040 ; AVX512VLBW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
   1041 ; AVX512VLBW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
   1042 ; AVX512VLBW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
   1043 ; AVX512VLBW-NEXT:    vporq %zmm0, %zmm1, %zmm0
   1044 ; AVX512VLBW-NEXT:    retq
   1045   %shl = shl <64 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
   1046   %lshr = lshr <64 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
   1047   %rmask = and <64 x i8> %lshr, <i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55>
   1048   %lmask = and <64 x i8> %shl, <i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33>
   1049   %or = or <64 x i8> %lmask, %rmask
   1050   ret <64 x i8> %or
   1051 }
   1052