Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
      4 
      5 ;
      6 ; udiv by 7
      7 ;
      8 
      9 define <8 x i64> @test_div7_8i64(<8 x i64> %a) nounwind {
     10 ; AVX-LABEL: test_div7_8i64:
     11 ; AVX:       # BB#0:
     12 ; AVX-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
     13 ; AVX-NEXT:    vpextrq $1, %xmm1, %rcx
     14 ; AVX-NEXT:    movabsq $2635249153387078803, %rsi # imm = 0x2492492492492493
     15 ; AVX-NEXT:    movq %rcx, %rax
     16 ; AVX-NEXT:    mulq %rsi
     17 ; AVX-NEXT:    subq %rdx, %rcx
     18 ; AVX-NEXT:    shrq %rcx
     19 ; AVX-NEXT:    addq %rdx, %rcx
     20 ; AVX-NEXT:    shrq $2, %rcx
     21 ; AVX-NEXT:    vmovq %rcx, %xmm2
     22 ; AVX-NEXT:    vmovq %xmm1, %rcx
     23 ; AVX-NEXT:    movq %rcx, %rax
     24 ; AVX-NEXT:    mulq %rsi
     25 ; AVX-NEXT:    subq %rdx, %rcx
     26 ; AVX-NEXT:    shrq %rcx
     27 ; AVX-NEXT:    addq %rdx, %rcx
     28 ; AVX-NEXT:    shrq $2, %rcx
     29 ; AVX-NEXT:    vmovq %rcx, %xmm1
     30 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
     31 ; AVX-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
     32 ; AVX-NEXT:    vpextrq $1, %xmm2, %rcx
     33 ; AVX-NEXT:    movq %rcx, %rax
     34 ; AVX-NEXT:    mulq %rsi
     35 ; AVX-NEXT:    subq %rdx, %rcx
     36 ; AVX-NEXT:    shrq %rcx
     37 ; AVX-NEXT:    addq %rdx, %rcx
     38 ; AVX-NEXT:    shrq $2, %rcx
     39 ; AVX-NEXT:    vmovq %rcx, %xmm3
     40 ; AVX-NEXT:    vmovq %xmm2, %rcx
     41 ; AVX-NEXT:    movq %rcx, %rax
     42 ; AVX-NEXT:    mulq %rsi
     43 ; AVX-NEXT:    subq %rdx, %rcx
     44 ; AVX-NEXT:    shrq %rcx
     45 ; AVX-NEXT:    addq %rdx, %rcx
     46 ; AVX-NEXT:    shrq $2, %rcx
     47 ; AVX-NEXT:    vmovq %rcx, %xmm2
     48 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
     49 ; AVX-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
     50 ; AVX-NEXT:    vextracti32x4 $1, %zmm0, %xmm2
     51 ; AVX-NEXT:    vpextrq $1, %xmm2, %rcx
     52 ; AVX-NEXT:    movq %rcx, %rax
     53 ; AVX-NEXT:    mulq %rsi
     54 ; AVX-NEXT:    subq %rdx, %rcx
     55 ; AVX-NEXT:    shrq %rcx
     56 ; AVX-NEXT:    addq %rdx, %rcx
     57 ; AVX-NEXT:    shrq $2, %rcx
     58 ; AVX-NEXT:    vmovq %rcx, %xmm3
     59 ; AVX-NEXT:    vmovq %xmm2, %rcx
     60 ; AVX-NEXT:    movq %rcx, %rax
     61 ; AVX-NEXT:    mulq %rsi
     62 ; AVX-NEXT:    subq %rdx, %rcx
     63 ; AVX-NEXT:    shrq %rcx
     64 ; AVX-NEXT:    addq %rdx, %rcx
     65 ; AVX-NEXT:    shrq $2, %rcx
     66 ; AVX-NEXT:    vmovq %rcx, %xmm2
     67 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
     68 ; AVX-NEXT:    vpextrq $1, %xmm0, %rcx
     69 ; AVX-NEXT:    movq %rcx, %rax
     70 ; AVX-NEXT:    mulq %rsi
     71 ; AVX-NEXT:    subq %rdx, %rcx
     72 ; AVX-NEXT:    shrq %rcx
     73 ; AVX-NEXT:    addq %rdx, %rcx
     74 ; AVX-NEXT:    shrq $2, %rcx
     75 ; AVX-NEXT:    vmovq %rcx, %xmm3
     76 ; AVX-NEXT:    vmovq %xmm0, %rcx
     77 ; AVX-NEXT:    movq %rcx, %rax
     78 ; AVX-NEXT:    mulq %rsi
     79 ; AVX-NEXT:    subq %rdx, %rcx
     80 ; AVX-NEXT:    shrq %rcx
     81 ; AVX-NEXT:    addq %rdx, %rcx
     82 ; AVX-NEXT:    shrq $2, %rcx
     83 ; AVX-NEXT:    vmovq %rcx, %xmm0
     84 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
     85 ; AVX-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
     86 ; AVX-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
     87 ; AVX-NEXT:    retq
     88   %res = udiv <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
     89   ret <8 x i64> %res
     90 }
     91 
     92 define <16 x i32> @test_div7_16i32(<16 x i32> %a) nounwind {
     93 ; AVX-LABEL: test_div7_16i32:
     94 ; AVX:       # BB#0:
     95 ; AVX-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
     96 ; AVX-NEXT:    vpextrd $1, %xmm1, %eax
     97 ; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
     98 ; AVX-NEXT:    shrq $32, %rcx
     99 ; AVX-NEXT:    subl %ecx, %eax
    100 ; AVX-NEXT:    shrl %eax
    101 ; AVX-NEXT:    addl %ecx, %eax
    102 ; AVX-NEXT:    shrl $2, %eax
    103 ; AVX-NEXT:    vmovd %xmm1, %ecx
    104 ; AVX-NEXT:    imulq $613566757, %rcx, %rdx # imm = 0x24924925
    105 ; AVX-NEXT:    shrq $32, %rdx
    106 ; AVX-NEXT:    subl %edx, %ecx
    107 ; AVX-NEXT:    shrl %ecx
    108 ; AVX-NEXT:    addl %edx, %ecx
    109 ; AVX-NEXT:    shrl $2, %ecx
    110 ; AVX-NEXT:    vmovd %ecx, %xmm2
    111 ; AVX-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
    112 ; AVX-NEXT:    vpextrd $2, %xmm1, %eax
    113 ; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
    114 ; AVX-NEXT:    shrq $32, %rcx
    115 ; AVX-NEXT:    subl %ecx, %eax
    116 ; AVX-NEXT:    shrl %eax
    117 ; AVX-NEXT:    addl %ecx, %eax
    118 ; AVX-NEXT:    shrl $2, %eax
    119 ; AVX-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
    120 ; AVX-NEXT:    vpextrd $3, %xmm1, %eax
    121 ; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
    122 ; AVX-NEXT:    shrq $32, %rcx
    123 ; AVX-NEXT:    subl %ecx, %eax
    124 ; AVX-NEXT:    shrl %eax
    125 ; AVX-NEXT:    addl %ecx, %eax
    126 ; AVX-NEXT:    shrl $2, %eax
    127 ; AVX-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm1
    128 ; AVX-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
    129 ; AVX-NEXT:    vpextrd $1, %xmm2, %eax
    130 ; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
    131 ; AVX-NEXT:    shrq $32, %rcx
    132 ; AVX-NEXT:    subl %ecx, %eax
    133 ; AVX-NEXT:    shrl %eax
    134 ; AVX-NEXT:    addl %ecx, %eax
    135 ; AVX-NEXT:    shrl $2, %eax
    136 ; AVX-NEXT:    vmovd %xmm2, %ecx
    137 ; AVX-NEXT:    imulq $613566757, %rcx, %rdx # imm = 0x24924925
    138 ; AVX-NEXT:    shrq $32, %rdx
    139 ; AVX-NEXT:    subl %edx, %ecx
    140 ; AVX-NEXT:    shrl %ecx
    141 ; AVX-NEXT:    addl %edx, %ecx
    142 ; AVX-NEXT:    shrl $2, %ecx
    143 ; AVX-NEXT:    vmovd %ecx, %xmm3
    144 ; AVX-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
    145 ; AVX-NEXT:    vpextrd $2, %xmm2, %eax
    146 ; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
    147 ; AVX-NEXT:    shrq $32, %rcx
    148 ; AVX-NEXT:    subl %ecx, %eax
    149 ; AVX-NEXT:    shrl %eax
    150 ; AVX-NEXT:    addl %ecx, %eax
    151 ; AVX-NEXT:    shrl $2, %eax
    152 ; AVX-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
    153 ; AVX-NEXT:    vpextrd $3, %xmm2, %eax
    154 ; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
    155 ; AVX-NEXT:    shrq $32, %rcx
    156 ; AVX-NEXT:    subl %ecx, %eax
    157 ; AVX-NEXT:    shrl %eax
    158 ; AVX-NEXT:    addl %ecx, %eax
    159 ; AVX-NEXT:    shrl $2, %eax
    160 ; AVX-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm2
    161 ; AVX-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
    162 ; AVX-NEXT:    vextracti32x4 $1, %zmm0, %xmm2
    163 ; AVX-NEXT:    vpextrd $1, %xmm2, %eax
    164 ; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
    165 ; AVX-NEXT:    shrq $32, %rcx
    166 ; AVX-NEXT:    subl %ecx, %eax
    167 ; AVX-NEXT:    shrl %eax
    168 ; AVX-NEXT:    addl %ecx, %eax
    169 ; AVX-NEXT:    shrl $2, %eax
    170 ; AVX-NEXT:    vmovd %xmm2, %ecx
    171 ; AVX-NEXT:    imulq $613566757, %rcx, %rdx # imm = 0x24924925
    172 ; AVX-NEXT:    shrq $32, %rdx
    173 ; AVX-NEXT:    subl %edx, %ecx
    174 ; AVX-NEXT:    shrl %ecx
    175 ; AVX-NEXT:    addl %edx, %ecx
    176 ; AVX-NEXT:    shrl $2, %ecx
    177 ; AVX-NEXT:    vmovd %ecx, %xmm3
    178 ; AVX-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
    179 ; AVX-NEXT:    vpextrd $2, %xmm2, %eax
    180 ; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
    181 ; AVX-NEXT:    shrq $32, %rcx
    182 ; AVX-NEXT:    subl %ecx, %eax
    183 ; AVX-NEXT:    shrl %eax
    184 ; AVX-NEXT:    addl %ecx, %eax
    185 ; AVX-NEXT:    shrl $2, %eax
    186 ; AVX-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
    187 ; AVX-NEXT:    vpextrd $3, %xmm2, %eax
    188 ; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
    189 ; AVX-NEXT:    shrq $32, %rcx
    190 ; AVX-NEXT:    subl %ecx, %eax
    191 ; AVX-NEXT:    shrl %eax
    192 ; AVX-NEXT:    addl %ecx, %eax
    193 ; AVX-NEXT:    shrl $2, %eax
    194 ; AVX-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm2
    195 ; AVX-NEXT:    vpextrd $1, %xmm0, %eax
    196 ; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
    197 ; AVX-NEXT:    shrq $32, %rcx
    198 ; AVX-NEXT:    subl %ecx, %eax
    199 ; AVX-NEXT:    shrl %eax
    200 ; AVX-NEXT:    addl %ecx, %eax
    201 ; AVX-NEXT:    shrl $2, %eax
    202 ; AVX-NEXT:    vmovd %xmm0, %ecx
    203 ; AVX-NEXT:    imulq $613566757, %rcx, %rdx # imm = 0x24924925
    204 ; AVX-NEXT:    shrq $32, %rdx
    205 ; AVX-NEXT:    subl %edx, %ecx
    206 ; AVX-NEXT:    shrl %ecx
    207 ; AVX-NEXT:    addl %edx, %ecx
    208 ; AVX-NEXT:    shrl $2, %ecx
    209 ; AVX-NEXT:    vmovd %ecx, %xmm3
    210 ; AVX-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
    211 ; AVX-NEXT:    vpextrd $2, %xmm0, %eax
    212 ; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
    213 ; AVX-NEXT:    shrq $32, %rcx
    214 ; AVX-NEXT:    subl %ecx, %eax
    215 ; AVX-NEXT:    shrl %eax
    216 ; AVX-NEXT:    addl %ecx, %eax
    217 ; AVX-NEXT:    shrl $2, %eax
    218 ; AVX-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
    219 ; AVX-NEXT:    vpextrd $3, %xmm0, %eax
    220 ; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
    221 ; AVX-NEXT:    shrq $32, %rcx
    222 ; AVX-NEXT:    subl %ecx, %eax
    223 ; AVX-NEXT:    shrl %eax
    224 ; AVX-NEXT:    addl %ecx, %eax
    225 ; AVX-NEXT:    shrl $2, %eax
    226 ; AVX-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm0
    227 ; AVX-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
    228 ; AVX-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
    229 ; AVX-NEXT:    retq
    230   %res = udiv <16 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
    231   ret <16 x i32> %res
    232 }
    233 
    234 define <32 x i16> @test_div7_32i16(<32 x i16> %a) nounwind {
    235 ; AVX512F-LABEL: test_div7_32i16:
    236 ; AVX512F:       # BB#0:
    237 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm2 = [9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363]
    238 ; AVX512F-NEXT:    vpmulhuw %ymm2, %ymm0, %ymm3
    239 ; AVX512F-NEXT:    vpsubw %ymm3, %ymm0, %ymm0
    240 ; AVX512F-NEXT:    vpsrlw $1, %ymm0, %ymm0
    241 ; AVX512F-NEXT:    vpaddw %ymm3, %ymm0, %ymm0
    242 ; AVX512F-NEXT:    vpsrlw $2, %ymm0, %ymm0
    243 ; AVX512F-NEXT:    vpmulhuw %ymm2, %ymm1, %ymm2
    244 ; AVX512F-NEXT:    vpsubw %ymm2, %ymm1, %ymm1
    245 ; AVX512F-NEXT:    vpsrlw $1, %ymm1, %ymm1
    246 ; AVX512F-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
    247 ; AVX512F-NEXT:    vpsrlw $2, %ymm1, %ymm1
    248 ; AVX512F-NEXT:    retq
    249 ;
    250 ; AVX512BW-LABEL: test_div7_32i16:
    251 ; AVX512BW:       # BB#0:
    252 ; AVX512BW-NEXT:    vpmulhuw {{.*}}(%rip), %zmm0, %zmm1
    253 ; AVX512BW-NEXT:    vpsubw %zmm1, %zmm0, %zmm0
    254 ; AVX512BW-NEXT:    vpsrlw $1, %zmm0, %zmm0
    255 ; AVX512BW-NEXT:    vpaddw %zmm1, %zmm0, %zmm0
    256 ; AVX512BW-NEXT:    vpsrlw $2, %zmm0, %zmm0
    257 ; AVX512BW-NEXT:    retq
    258   %res = udiv <32 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
    259   ret <32 x i16> %res
    260 }
    261 
    262 define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
    263 ; AVX512F-LABEL: test_div7_64i8:
    264 ; AVX512F:       # BB#0:
    265 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm2 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37]
    266 ; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm3
    267 ; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
    268 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm4
    269 ; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
    270 ; AVX512F-NEXT:    vpmullw %ymm3, %ymm4, %ymm4
    271 ; AVX512F-NEXT:    vpsrlw $8, %ymm4, %ymm4
    272 ; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
    273 ; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    274 ; AVX512F-NEXT:    vpmullw %ymm2, %ymm5, %ymm5
    275 ; AVX512F-NEXT:    vpsrlw $8, %ymm5, %ymm5
    276 ; AVX512F-NEXT:    vperm2i128 {{.*#+}} ymm6 = ymm5[2,3],ymm4[2,3]
    277 ; AVX512F-NEXT:    vinserti128 $1, %xmm4, %ymm5, %ymm4
    278 ; AVX512F-NEXT:    vpackuswb %ymm6, %ymm4, %ymm4
    279 ; AVX512F-NEXT:    vpsubb %ymm4, %ymm0, %ymm0
    280 ; AVX512F-NEXT:    vpsrlw $1, %ymm0, %ymm0
    281 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm5 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
    282 ; AVX512F-NEXT:    vpand %ymm5, %ymm0, %ymm0
    283 ; AVX512F-NEXT:    vpaddb %ymm4, %ymm0, %ymm0
    284 ; AVX512F-NEXT:    vpsrlw $2, %ymm0, %ymm0
    285 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm4 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
    286 ; AVX512F-NEXT:    vpand %ymm4, %ymm0, %ymm0
    287 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm6
    288 ; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm6 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero,xmm6[8],zero,xmm6[9],zero,xmm6[10],zero,xmm6[11],zero,xmm6[12],zero,xmm6[13],zero,xmm6[14],zero,xmm6[15],zero
    289 ; AVX512F-NEXT:    vpmullw %ymm3, %ymm6, %ymm3
    290 ; AVX512F-NEXT:    vpsrlw $8, %ymm3, %ymm3
    291 ; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm6 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
    292 ; AVX512F-NEXT:    vpmullw %ymm2, %ymm6, %ymm2
    293 ; AVX512F-NEXT:    vpsrlw $8, %ymm2, %ymm2
    294 ; AVX512F-NEXT:    vperm2i128 {{.*#+}} ymm6 = ymm2[2,3],ymm3[2,3]
    295 ; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
    296 ; AVX512F-NEXT:    vpackuswb %ymm6, %ymm2, %ymm2
    297 ; AVX512F-NEXT:    vpsubb %ymm2, %ymm1, %ymm1
    298 ; AVX512F-NEXT:    vpsrlw $1, %ymm1, %ymm1
    299 ; AVX512F-NEXT:    vpand %ymm5, %ymm1, %ymm1
    300 ; AVX512F-NEXT:    vpaddb %ymm2, %ymm1, %ymm1
    301 ; AVX512F-NEXT:    vpsrlw $2, %ymm1, %ymm1
    302 ; AVX512F-NEXT:    vpand %ymm4, %ymm1, %ymm1
    303 ; AVX512F-NEXT:    retq
    304 ;
    305 ; AVX512BW-LABEL: test_div7_64i8:
    306 ; AVX512BW:       # BB#0:
    307 ; AVX512BW-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
    308 ; AVX512BW-NEXT:    vpextrb $1, %xmm1, %eax
    309 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    310 ; AVX512BW-NEXT:    shrl $8, %ecx
    311 ; AVX512BW-NEXT:    subb %cl, %al
    312 ; AVX512BW-NEXT:    shrb %al
    313 ; AVX512BW-NEXT:    addb %cl, %al
    314 ; AVX512BW-NEXT:    shrb $2, %al
    315 ; AVX512BW-NEXT:    movzbl %al, %eax
    316 ; AVX512BW-NEXT:    vpextrb $0, %xmm1, %ecx
    317 ; AVX512BW-NEXT:    imull $37, %ecx, %edx
    318 ; AVX512BW-NEXT:    shrl $8, %edx
    319 ; AVX512BW-NEXT:    subb %dl, %cl
    320 ; AVX512BW-NEXT:    shrb %cl
    321 ; AVX512BW-NEXT:    addb %dl, %cl
    322 ; AVX512BW-NEXT:    shrb $2, %cl
    323 ; AVX512BW-NEXT:    movzbl %cl, %ecx
    324 ; AVX512BW-NEXT:    vmovd %ecx, %xmm2
    325 ; AVX512BW-NEXT:    vpinsrb $1, %eax, %xmm2, %xmm2
    326 ; AVX512BW-NEXT:    vpextrb $2, %xmm1, %eax
    327 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    328 ; AVX512BW-NEXT:    shrl $8, %ecx
    329 ; AVX512BW-NEXT:    subb %cl, %al
    330 ; AVX512BW-NEXT:    shrb %al
    331 ; AVX512BW-NEXT:    addb %cl, %al
    332 ; AVX512BW-NEXT:    shrb $2, %al
    333 ; AVX512BW-NEXT:    movzbl %al, %eax
    334 ; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm2, %xmm2
    335 ; AVX512BW-NEXT:    vpextrb $3, %xmm1, %eax
    336 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    337 ; AVX512BW-NEXT:    shrl $8, %ecx
    338 ; AVX512BW-NEXT:    subb %cl, %al
    339 ; AVX512BW-NEXT:    shrb %al
    340 ; AVX512BW-NEXT:    addb %cl, %al
    341 ; AVX512BW-NEXT:    shrb $2, %al
    342 ; AVX512BW-NEXT:    movzbl %al, %eax
    343 ; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm2, %xmm2
    344 ; AVX512BW-NEXT:    vpextrb $4, %xmm1, %eax
    345 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    346 ; AVX512BW-NEXT:    shrl $8, %ecx
    347 ; AVX512BW-NEXT:    subb %cl, %al
    348 ; AVX512BW-NEXT:    shrb %al
    349 ; AVX512BW-NEXT:    addb %cl, %al
    350 ; AVX512BW-NEXT:    shrb $2, %al
    351 ; AVX512BW-NEXT:    movzbl %al, %eax
    352 ; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm2, %xmm2
    353 ; AVX512BW-NEXT:    vpextrb $5, %xmm1, %eax
    354 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    355 ; AVX512BW-NEXT:    shrl $8, %ecx
    356 ; AVX512BW-NEXT:    subb %cl, %al
    357 ; AVX512BW-NEXT:    shrb %al
    358 ; AVX512BW-NEXT:    addb %cl, %al
    359 ; AVX512BW-NEXT:    shrb $2, %al
    360 ; AVX512BW-NEXT:    movzbl %al, %eax
    361 ; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm2, %xmm2
    362 ; AVX512BW-NEXT:    vpextrb $6, %xmm1, %eax
    363 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    364 ; AVX512BW-NEXT:    shrl $8, %ecx
    365 ; AVX512BW-NEXT:    subb %cl, %al
    366 ; AVX512BW-NEXT:    shrb %al
    367 ; AVX512BW-NEXT:    addb %cl, %al
    368 ; AVX512BW-NEXT:    shrb $2, %al
    369 ; AVX512BW-NEXT:    movzbl %al, %eax
    370 ; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm2, %xmm2
    371 ; AVX512BW-NEXT:    vpextrb $7, %xmm1, %eax
    372 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    373 ; AVX512BW-NEXT:    shrl $8, %ecx
    374 ; AVX512BW-NEXT:    subb %cl, %al
    375 ; AVX512BW-NEXT:    shrb %al
    376 ; AVX512BW-NEXT:    addb %cl, %al
    377 ; AVX512BW-NEXT:    shrb $2, %al
    378 ; AVX512BW-NEXT:    movzbl %al, %eax
    379 ; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm2, %xmm2
    380 ; AVX512BW-NEXT:    vpextrb $8, %xmm1, %eax
    381 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    382 ; AVX512BW-NEXT:    shrl $8, %ecx
    383 ; AVX512BW-NEXT:    subb %cl, %al
    384 ; AVX512BW-NEXT:    shrb %al
    385 ; AVX512BW-NEXT:    addb %cl, %al
    386 ; AVX512BW-NEXT:    shrb $2, %al
    387 ; AVX512BW-NEXT:    movzbl %al, %eax
    388 ; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm2, %xmm2
    389 ; AVX512BW-NEXT:    vpextrb $9, %xmm1, %eax
    390 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    391 ; AVX512BW-NEXT:    shrl $8, %ecx
    392 ; AVX512BW-NEXT:    subb %cl, %al
    393 ; AVX512BW-NEXT:    shrb %al
    394 ; AVX512BW-NEXT:    addb %cl, %al
    395 ; AVX512BW-NEXT:    shrb $2, %al
    396 ; AVX512BW-NEXT:    movzbl %al, %eax
    397 ; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm2, %xmm2
    398 ; AVX512BW-NEXT:    vpextrb $10, %xmm1, %eax
    399 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    400 ; AVX512BW-NEXT:    shrl $8, %ecx
    401 ; AVX512BW-NEXT:    subb %cl, %al
    402 ; AVX512BW-NEXT:    shrb %al
    403 ; AVX512BW-NEXT:    addb %cl, %al
    404 ; AVX512BW-NEXT:    shrb $2, %al
    405 ; AVX512BW-NEXT:    movzbl %al, %eax
    406 ; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm2, %xmm2
    407 ; AVX512BW-NEXT:    vpextrb $11, %xmm1, %eax
    408 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    409 ; AVX512BW-NEXT:    shrl $8, %ecx
    410 ; AVX512BW-NEXT:    subb %cl, %al
    411 ; AVX512BW-NEXT:    shrb %al
    412 ; AVX512BW-NEXT:    addb %cl, %al
    413 ; AVX512BW-NEXT:    shrb $2, %al
    414 ; AVX512BW-NEXT:    movzbl %al, %eax
    415 ; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm2, %xmm2
    416 ; AVX512BW-NEXT:    vpextrb $12, %xmm1, %eax
    417 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    418 ; AVX512BW-NEXT:    shrl $8, %ecx
    419 ; AVX512BW-NEXT:    subb %cl, %al
    420 ; AVX512BW-NEXT:    shrb %al
    421 ; AVX512BW-NEXT:    addb %cl, %al
    422 ; AVX512BW-NEXT:    shrb $2, %al
    423 ; AVX512BW-NEXT:    movzbl %al, %eax
    424 ; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm2, %xmm2
    425 ; AVX512BW-NEXT:    vpextrb $13, %xmm1, %eax
    426 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    427 ; AVX512BW-NEXT:    shrl $8, %ecx
    428 ; AVX512BW-NEXT:    subb %cl, %al
    429 ; AVX512BW-NEXT:    shrb %al
    430 ; AVX512BW-NEXT:    addb %cl, %al
    431 ; AVX512BW-NEXT:    shrb $2, %al
    432 ; AVX512BW-NEXT:    movzbl %al, %eax
    433 ; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm2, %xmm2
    434 ; AVX512BW-NEXT:    vpextrb $14, %xmm1, %eax
    435 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    436 ; AVX512BW-NEXT:    shrl $8, %ecx
    437 ; AVX512BW-NEXT:    subb %cl, %al
    438 ; AVX512BW-NEXT:    shrb %al
    439 ; AVX512BW-NEXT:    addb %cl, %al
    440 ; AVX512BW-NEXT:    shrb $2, %al
    441 ; AVX512BW-NEXT:    movzbl %al, %eax
    442 ; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm2, %xmm2
    443 ; AVX512BW-NEXT:    vpextrb $15, %xmm1, %eax
    444 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    445 ; AVX512BW-NEXT:    shrl $8, %ecx
    446 ; AVX512BW-NEXT:    subb %cl, %al
    447 ; AVX512BW-NEXT:    shrb %al
    448 ; AVX512BW-NEXT:    addb %cl, %al
    449 ; AVX512BW-NEXT:    shrb $2, %al
    450 ; AVX512BW-NEXT:    movzbl %al, %eax
    451 ; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm2, %xmm1
    452 ; AVX512BW-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
    453 ; AVX512BW-NEXT:    vpextrb $1, %xmm2, %eax
    454 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    455 ; AVX512BW-NEXT:    shrl $8, %ecx
    456 ; AVX512BW-NEXT:    subb %cl, %al
    457 ; AVX512BW-NEXT:    shrb %al
    458 ; AVX512BW-NEXT:    addb %cl, %al
    459 ; AVX512BW-NEXT:    shrb $2, %al
    460 ; AVX512BW-NEXT:    movzbl %al, %eax
    461 ; AVX512BW-NEXT:    vpextrb $0, %xmm2, %ecx
    462 ; AVX512BW-NEXT:    imull $37, %ecx, %edx
    463 ; AVX512BW-NEXT:    shrl $8, %edx
    464 ; AVX512BW-NEXT:    subb %dl, %cl
    465 ; AVX512BW-NEXT:    shrb %cl
    466 ; AVX512BW-NEXT:    addb %dl, %cl
    467 ; AVX512BW-NEXT:    shrb $2, %cl
    468 ; AVX512BW-NEXT:    movzbl %cl, %ecx
    469 ; AVX512BW-NEXT:    vmovd %ecx, %xmm3
    470 ; AVX512BW-NEXT:    vpinsrb $1, %eax, %xmm3, %xmm3
    471 ; AVX512BW-NEXT:    vpextrb $2, %xmm2, %eax
    472 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    473 ; AVX512BW-NEXT:    shrl $8, %ecx
    474 ; AVX512BW-NEXT:    subb %cl, %al
    475 ; AVX512BW-NEXT:    shrb %al
    476 ; AVX512BW-NEXT:    addb %cl, %al
    477 ; AVX512BW-NEXT:    shrb $2, %al
    478 ; AVX512BW-NEXT:    movzbl %al, %eax
    479 ; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
    480 ; AVX512BW-NEXT:    vpextrb $3, %xmm2, %eax
    481 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    482 ; AVX512BW-NEXT:    shrl $8, %ecx
    483 ; AVX512BW-NEXT:    subb %cl, %al
    484 ; AVX512BW-NEXT:    shrb %al
    485 ; AVX512BW-NEXT:    addb %cl, %al
    486 ; AVX512BW-NEXT:    shrb $2, %al
    487 ; AVX512BW-NEXT:    movzbl %al, %eax
    488 ; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
    489 ; AVX512BW-NEXT:    vpextrb $4, %xmm2, %eax
    490 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    491 ; AVX512BW-NEXT:    shrl $8, %ecx
    492 ; AVX512BW-NEXT:    subb %cl, %al
    493 ; AVX512BW-NEXT:    shrb %al
    494 ; AVX512BW-NEXT:    addb %cl, %al
    495 ; AVX512BW-NEXT:    shrb $2, %al
    496 ; AVX512BW-NEXT:    movzbl %al, %eax
    497 ; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
    498 ; AVX512BW-NEXT:    vpextrb $5, %xmm2, %eax
    499 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    500 ; AVX512BW-NEXT:    shrl $8, %ecx
    501 ; AVX512BW-NEXT:    subb %cl, %al
    502 ; AVX512BW-NEXT:    shrb %al
    503 ; AVX512BW-NEXT:    addb %cl, %al
    504 ; AVX512BW-NEXT:    shrb $2, %al
    505 ; AVX512BW-NEXT:    movzbl %al, %eax
    506 ; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
    507 ; AVX512BW-NEXT:    vpextrb $6, %xmm2, %eax
    508 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    509 ; AVX512BW-NEXT:    shrl $8, %ecx
    510 ; AVX512BW-NEXT:    subb %cl, %al
    511 ; AVX512BW-NEXT:    shrb %al
    512 ; AVX512BW-NEXT:    addb %cl, %al
    513 ; AVX512BW-NEXT:    shrb $2, %al
    514 ; AVX512BW-NEXT:    movzbl %al, %eax
    515 ; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
    516 ; AVX512BW-NEXT:    vpextrb $7, %xmm2, %eax
    517 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    518 ; AVX512BW-NEXT:    shrl $8, %ecx
    519 ; AVX512BW-NEXT:    subb %cl, %al
    520 ; AVX512BW-NEXT:    shrb %al
    521 ; AVX512BW-NEXT:    addb %cl, %al
    522 ; AVX512BW-NEXT:    shrb $2, %al
    523 ; AVX512BW-NEXT:    movzbl %al, %eax
    524 ; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
    525 ; AVX512BW-NEXT:    vpextrb $8, %xmm2, %eax
    526 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    527 ; AVX512BW-NEXT:    shrl $8, %ecx
    528 ; AVX512BW-NEXT:    subb %cl, %al
    529 ; AVX512BW-NEXT:    shrb %al
    530 ; AVX512BW-NEXT:    addb %cl, %al
    531 ; AVX512BW-NEXT:    shrb $2, %al
    532 ; AVX512BW-NEXT:    movzbl %al, %eax
    533 ; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
    534 ; AVX512BW-NEXT:    vpextrb $9, %xmm2, %eax
    535 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    536 ; AVX512BW-NEXT:    shrl $8, %ecx
    537 ; AVX512BW-NEXT:    subb %cl, %al
    538 ; AVX512BW-NEXT:    shrb %al
    539 ; AVX512BW-NEXT:    addb %cl, %al
    540 ; AVX512BW-NEXT:    shrb $2, %al
    541 ; AVX512BW-NEXT:    movzbl %al, %eax
    542 ; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
    543 ; AVX512BW-NEXT:    vpextrb $10, %xmm2, %eax
    544 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    545 ; AVX512BW-NEXT:    shrl $8, %ecx
    546 ; AVX512BW-NEXT:    subb %cl, %al
    547 ; AVX512BW-NEXT:    shrb %al
    548 ; AVX512BW-NEXT:    addb %cl, %al
    549 ; AVX512BW-NEXT:    shrb $2, %al
    550 ; AVX512BW-NEXT:    movzbl %al, %eax
    551 ; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
    552 ; AVX512BW-NEXT:    vpextrb $11, %xmm2, %eax
    553 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    554 ; AVX512BW-NEXT:    shrl $8, %ecx
    555 ; AVX512BW-NEXT:    subb %cl, %al
    556 ; AVX512BW-NEXT:    shrb %al
    557 ; AVX512BW-NEXT:    addb %cl, %al
    558 ; AVX512BW-NEXT:    shrb $2, %al
    559 ; AVX512BW-NEXT:    movzbl %al, %eax
    560 ; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
    561 ; AVX512BW-NEXT:    vpextrb $12, %xmm2, %eax
    562 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    563 ; AVX512BW-NEXT:    shrl $8, %ecx
    564 ; AVX512BW-NEXT:    subb %cl, %al
    565 ; AVX512BW-NEXT:    shrb %al
    566 ; AVX512BW-NEXT:    addb %cl, %al
    567 ; AVX512BW-NEXT:    shrb $2, %al
    568 ; AVX512BW-NEXT:    movzbl %al, %eax
    569 ; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
    570 ; AVX512BW-NEXT:    vpextrb $13, %xmm2, %eax
    571 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    572 ; AVX512BW-NEXT:    shrl $8, %ecx
    573 ; AVX512BW-NEXT:    subb %cl, %al
    574 ; AVX512BW-NEXT:    shrb %al
    575 ; AVX512BW-NEXT:    addb %cl, %al
    576 ; AVX512BW-NEXT:    shrb $2, %al
    577 ; AVX512BW-NEXT:    movzbl %al, %eax
    578 ; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
    579 ; AVX512BW-NEXT:    vpextrb $14, %xmm2, %eax
    580 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    581 ; AVX512BW-NEXT:    shrl $8, %ecx
    582 ; AVX512BW-NEXT:    subb %cl, %al
    583 ; AVX512BW-NEXT:    shrb %al
    584 ; AVX512BW-NEXT:    addb %cl, %al
    585 ; AVX512BW-NEXT:    shrb $2, %al
    586 ; AVX512BW-NEXT:    movzbl %al, %eax
    587 ; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
    588 ; AVX512BW-NEXT:    vpextrb $15, %xmm2, %eax
    589 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    590 ; AVX512BW-NEXT:    shrl $8, %ecx
    591 ; AVX512BW-NEXT:    subb %cl, %al
    592 ; AVX512BW-NEXT:    shrb %al
    593 ; AVX512BW-NEXT:    addb %cl, %al
    594 ; AVX512BW-NEXT:    shrb $2, %al
    595 ; AVX512BW-NEXT:    movzbl %al, %eax
    596 ; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm2
    597 ; AVX512BW-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
    598 ; AVX512BW-NEXT:    vextracti32x4 $1, %zmm0, %xmm2
    599 ; AVX512BW-NEXT:    vpextrb $1, %xmm2, %eax
    600 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    601 ; AVX512BW-NEXT:    shrl $8, %ecx
    602 ; AVX512BW-NEXT:    subb %cl, %al
    603 ; AVX512BW-NEXT:    shrb %al
    604 ; AVX512BW-NEXT:    addb %cl, %al
    605 ; AVX512BW-NEXT:    shrb $2, %al
    606 ; AVX512BW-NEXT:    movzbl %al, %eax
    607 ; AVX512BW-NEXT:    vpextrb $0, %xmm2, %ecx
    608 ; AVX512BW-NEXT:    imull $37, %ecx, %edx
    609 ; AVX512BW-NEXT:    shrl $8, %edx
    610 ; AVX512BW-NEXT:    subb %dl, %cl
    611 ; AVX512BW-NEXT:    shrb %cl
    612 ; AVX512BW-NEXT:    addb %dl, %cl
    613 ; AVX512BW-NEXT:    shrb $2, %cl
    614 ; AVX512BW-NEXT:    movzbl %cl, %ecx
    615 ; AVX512BW-NEXT:    vmovd %ecx, %xmm3
    616 ; AVX512BW-NEXT:    vpinsrb $1, %eax, %xmm3, %xmm3
    617 ; AVX512BW-NEXT:    vpextrb $2, %xmm2, %eax
    618 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    619 ; AVX512BW-NEXT:    shrl $8, %ecx
    620 ; AVX512BW-NEXT:    subb %cl, %al
    621 ; AVX512BW-NEXT:    shrb %al
    622 ; AVX512BW-NEXT:    addb %cl, %al
    623 ; AVX512BW-NEXT:    shrb $2, %al
    624 ; AVX512BW-NEXT:    movzbl %al, %eax
    625 ; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
    626 ; AVX512BW-NEXT:    vpextrb $3, %xmm2, %eax
    627 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    628 ; AVX512BW-NEXT:    shrl $8, %ecx
    629 ; AVX512BW-NEXT:    subb %cl, %al
    630 ; AVX512BW-NEXT:    shrb %al
    631 ; AVX512BW-NEXT:    addb %cl, %al
    632 ; AVX512BW-NEXT:    shrb $2, %al
    633 ; AVX512BW-NEXT:    movzbl %al, %eax
    634 ; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
    635 ; AVX512BW-NEXT:    vpextrb $4, %xmm2, %eax
    636 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    637 ; AVX512BW-NEXT:    shrl $8, %ecx
    638 ; AVX512BW-NEXT:    subb %cl, %al
    639 ; AVX512BW-NEXT:    shrb %al
    640 ; AVX512BW-NEXT:    addb %cl, %al
    641 ; AVX512BW-NEXT:    shrb $2, %al
    642 ; AVX512BW-NEXT:    movzbl %al, %eax
    643 ; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
    644 ; AVX512BW-NEXT:    vpextrb $5, %xmm2, %eax
    645 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    646 ; AVX512BW-NEXT:    shrl $8, %ecx
    647 ; AVX512BW-NEXT:    subb %cl, %al
    648 ; AVX512BW-NEXT:    shrb %al
    649 ; AVX512BW-NEXT:    addb %cl, %al
    650 ; AVX512BW-NEXT:    shrb $2, %al
    651 ; AVX512BW-NEXT:    movzbl %al, %eax
    652 ; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
    653 ; AVX512BW-NEXT:    vpextrb $6, %xmm2, %eax
    654 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    655 ; AVX512BW-NEXT:    shrl $8, %ecx
    656 ; AVX512BW-NEXT:    subb %cl, %al
    657 ; AVX512BW-NEXT:    shrb %al
    658 ; AVX512BW-NEXT:    addb %cl, %al
    659 ; AVX512BW-NEXT:    shrb $2, %al
    660 ; AVX512BW-NEXT:    movzbl %al, %eax
    661 ; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
    662 ; AVX512BW-NEXT:    vpextrb $7, %xmm2, %eax
    663 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    664 ; AVX512BW-NEXT:    shrl $8, %ecx
    665 ; AVX512BW-NEXT:    subb %cl, %al
    666 ; AVX512BW-NEXT:    shrb %al
    667 ; AVX512BW-NEXT:    addb %cl, %al
    668 ; AVX512BW-NEXT:    shrb $2, %al
    669 ; AVX512BW-NEXT:    movzbl %al, %eax
    670 ; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
    671 ; AVX512BW-NEXT:    vpextrb $8, %xmm2, %eax
    672 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    673 ; AVX512BW-NEXT:    shrl $8, %ecx
    674 ; AVX512BW-NEXT:    subb %cl, %al
    675 ; AVX512BW-NEXT:    shrb %al
    676 ; AVX512BW-NEXT:    addb %cl, %al
    677 ; AVX512BW-NEXT:    shrb $2, %al
    678 ; AVX512BW-NEXT:    movzbl %al, %eax
    679 ; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
    680 ; AVX512BW-NEXT:    vpextrb $9, %xmm2, %eax
    681 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    682 ; AVX512BW-NEXT:    shrl $8, %ecx
    683 ; AVX512BW-NEXT:    subb %cl, %al
    684 ; AVX512BW-NEXT:    shrb %al
    685 ; AVX512BW-NEXT:    addb %cl, %al
    686 ; AVX512BW-NEXT:    shrb $2, %al
    687 ; AVX512BW-NEXT:    movzbl %al, %eax
    688 ; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
    689 ; AVX512BW-NEXT:    vpextrb $10, %xmm2, %eax
    690 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    691 ; AVX512BW-NEXT:    shrl $8, %ecx
    692 ; AVX512BW-NEXT:    subb %cl, %al
    693 ; AVX512BW-NEXT:    shrb %al
    694 ; AVX512BW-NEXT:    addb %cl, %al
    695 ; AVX512BW-NEXT:    shrb $2, %al
    696 ; AVX512BW-NEXT:    movzbl %al, %eax
    697 ; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
    698 ; AVX512BW-NEXT:    vpextrb $11, %xmm2, %eax
    699 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    700 ; AVX512BW-NEXT:    shrl $8, %ecx
    701 ; AVX512BW-NEXT:    subb %cl, %al
    702 ; AVX512BW-NEXT:    shrb %al
    703 ; AVX512BW-NEXT:    addb %cl, %al
    704 ; AVX512BW-NEXT:    shrb $2, %al
    705 ; AVX512BW-NEXT:    movzbl %al, %eax
    706 ; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
    707 ; AVX512BW-NEXT:    vpextrb $12, %xmm2, %eax
    708 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    709 ; AVX512BW-NEXT:    shrl $8, %ecx
    710 ; AVX512BW-NEXT:    subb %cl, %al
    711 ; AVX512BW-NEXT:    shrb %al
    712 ; AVX512BW-NEXT:    addb %cl, %al
    713 ; AVX512BW-NEXT:    shrb $2, %al
    714 ; AVX512BW-NEXT:    movzbl %al, %eax
    715 ; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
    716 ; AVX512BW-NEXT:    vpextrb $13, %xmm2, %eax
    717 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    718 ; AVX512BW-NEXT:    shrl $8, %ecx
    719 ; AVX512BW-NEXT:    subb %cl, %al
    720 ; AVX512BW-NEXT:    shrb %al
    721 ; AVX512BW-NEXT:    addb %cl, %al
    722 ; AVX512BW-NEXT:    shrb $2, %al
    723 ; AVX512BW-NEXT:    movzbl %al, %eax
    724 ; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
    725 ; AVX512BW-NEXT:    vpextrb $14, %xmm2, %eax
    726 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    727 ; AVX512BW-NEXT:    shrl $8, %ecx
    728 ; AVX512BW-NEXT:    subb %cl, %al
    729 ; AVX512BW-NEXT:    shrb %al
    730 ; AVX512BW-NEXT:    addb %cl, %al
    731 ; AVX512BW-NEXT:    shrb $2, %al
    732 ; AVX512BW-NEXT:    movzbl %al, %eax
    733 ; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
    734 ; AVX512BW-NEXT:    vpextrb $15, %xmm2, %eax
    735 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    736 ; AVX512BW-NEXT:    shrl $8, %ecx
    737 ; AVX512BW-NEXT:    subb %cl, %al
    738 ; AVX512BW-NEXT:    shrb %al
    739 ; AVX512BW-NEXT:    addb %cl, %al
    740 ; AVX512BW-NEXT:    shrb $2, %al
    741 ; AVX512BW-NEXT:    movzbl %al, %eax
    742 ; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm2
    743 ; AVX512BW-NEXT:    vpextrb $1, %xmm0, %eax
    744 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    745 ; AVX512BW-NEXT:    shrl $8, %ecx
    746 ; AVX512BW-NEXT:    subb %cl, %al
    747 ; AVX512BW-NEXT:    shrb %al
    748 ; AVX512BW-NEXT:    addb %cl, %al
    749 ; AVX512BW-NEXT:    shrb $2, %al
    750 ; AVX512BW-NEXT:    movzbl %al, %eax
    751 ; AVX512BW-NEXT:    vpextrb $0, %xmm0, %ecx
    752 ; AVX512BW-NEXT:    imull $37, %ecx, %edx
    753 ; AVX512BW-NEXT:    shrl $8, %edx
    754 ; AVX512BW-NEXT:    subb %dl, %cl
    755 ; AVX512BW-NEXT:    shrb %cl
    756 ; AVX512BW-NEXT:    addb %dl, %cl
    757 ; AVX512BW-NEXT:    shrb $2, %cl
    758 ; AVX512BW-NEXT:    movzbl %cl, %ecx
    759 ; AVX512BW-NEXT:    vmovd %ecx, %xmm3
    760 ; AVX512BW-NEXT:    vpinsrb $1, %eax, %xmm3, %xmm3
    761 ; AVX512BW-NEXT:    vpextrb $2, %xmm0, %eax
    762 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    763 ; AVX512BW-NEXT:    shrl $8, %ecx
    764 ; AVX512BW-NEXT:    subb %cl, %al
    765 ; AVX512BW-NEXT:    shrb %al
    766 ; AVX512BW-NEXT:    addb %cl, %al
    767 ; AVX512BW-NEXT:    shrb $2, %al
    768 ; AVX512BW-NEXT:    movzbl %al, %eax
    769 ; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
    770 ; AVX512BW-NEXT:    vpextrb $3, %xmm0, %eax
    771 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    772 ; AVX512BW-NEXT:    shrl $8, %ecx
    773 ; AVX512BW-NEXT:    subb %cl, %al
    774 ; AVX512BW-NEXT:    shrb %al
    775 ; AVX512BW-NEXT:    addb %cl, %al
    776 ; AVX512BW-NEXT:    shrb $2, %al
    777 ; AVX512BW-NEXT:    movzbl %al, %eax
    778 ; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
    779 ; AVX512BW-NEXT:    vpextrb $4, %xmm0, %eax
    780 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    781 ; AVX512BW-NEXT:    shrl $8, %ecx
    782 ; AVX512BW-NEXT:    subb %cl, %al
    783 ; AVX512BW-NEXT:    shrb %al
    784 ; AVX512BW-NEXT:    addb %cl, %al
    785 ; AVX512BW-NEXT:    shrb $2, %al
    786 ; AVX512BW-NEXT:    movzbl %al, %eax
    787 ; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
    788 ; AVX512BW-NEXT:    vpextrb $5, %xmm0, %eax
    789 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    790 ; AVX512BW-NEXT:    shrl $8, %ecx
    791 ; AVX512BW-NEXT:    subb %cl, %al
    792 ; AVX512BW-NEXT:    shrb %al
    793 ; AVX512BW-NEXT:    addb %cl, %al
    794 ; AVX512BW-NEXT:    shrb $2, %al
    795 ; AVX512BW-NEXT:    movzbl %al, %eax
    796 ; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
    797 ; AVX512BW-NEXT:    vpextrb $6, %xmm0, %eax
    798 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    799 ; AVX512BW-NEXT:    shrl $8, %ecx
    800 ; AVX512BW-NEXT:    subb %cl, %al
    801 ; AVX512BW-NEXT:    shrb %al
    802 ; AVX512BW-NEXT:    addb %cl, %al
    803 ; AVX512BW-NEXT:    shrb $2, %al
    804 ; AVX512BW-NEXT:    movzbl %al, %eax
    805 ; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
    806 ; AVX512BW-NEXT:    vpextrb $7, %xmm0, %eax
    807 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    808 ; AVX512BW-NEXT:    shrl $8, %ecx
    809 ; AVX512BW-NEXT:    subb %cl, %al
    810 ; AVX512BW-NEXT:    shrb %al
    811 ; AVX512BW-NEXT:    addb %cl, %al
    812 ; AVX512BW-NEXT:    shrb $2, %al
    813 ; AVX512BW-NEXT:    movzbl %al, %eax
    814 ; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
    815 ; AVX512BW-NEXT:    vpextrb $8, %xmm0, %eax
    816 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    817 ; AVX512BW-NEXT:    shrl $8, %ecx
    818 ; AVX512BW-NEXT:    subb %cl, %al
    819 ; AVX512BW-NEXT:    shrb %al
    820 ; AVX512BW-NEXT:    addb %cl, %al
    821 ; AVX512BW-NEXT:    shrb $2, %al
    822 ; AVX512BW-NEXT:    movzbl %al, %eax
    823 ; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
    824 ; AVX512BW-NEXT:    vpextrb $9, %xmm0, %eax
    825 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    826 ; AVX512BW-NEXT:    shrl $8, %ecx
    827 ; AVX512BW-NEXT:    subb %cl, %al
    828 ; AVX512BW-NEXT:    shrb %al
    829 ; AVX512BW-NEXT:    addb %cl, %al
    830 ; AVX512BW-NEXT:    shrb $2, %al
    831 ; AVX512BW-NEXT:    movzbl %al, %eax
    832 ; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
    833 ; AVX512BW-NEXT:    vpextrb $10, %xmm0, %eax
    834 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    835 ; AVX512BW-NEXT:    shrl $8, %ecx
    836 ; AVX512BW-NEXT:    subb %cl, %al
    837 ; AVX512BW-NEXT:    shrb %al
    838 ; AVX512BW-NEXT:    addb %cl, %al
    839 ; AVX512BW-NEXT:    shrb $2, %al
    840 ; AVX512BW-NEXT:    movzbl %al, %eax
    841 ; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
    842 ; AVX512BW-NEXT:    vpextrb $11, %xmm0, %eax
    843 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    844 ; AVX512BW-NEXT:    shrl $8, %ecx
    845 ; AVX512BW-NEXT:    subb %cl, %al
    846 ; AVX512BW-NEXT:    shrb %al
    847 ; AVX512BW-NEXT:    addb %cl, %al
    848 ; AVX512BW-NEXT:    shrb $2, %al
    849 ; AVX512BW-NEXT:    movzbl %al, %eax
    850 ; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
    851 ; AVX512BW-NEXT:    vpextrb $12, %xmm0, %eax
    852 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    853 ; AVX512BW-NEXT:    shrl $8, %ecx
    854 ; AVX512BW-NEXT:    subb %cl, %al
    855 ; AVX512BW-NEXT:    shrb %al
    856 ; AVX512BW-NEXT:    addb %cl, %al
    857 ; AVX512BW-NEXT:    shrb $2, %al
    858 ; AVX512BW-NEXT:    movzbl %al, %eax
    859 ; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
    860 ; AVX512BW-NEXT:    vpextrb $13, %xmm0, %eax
    861 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    862 ; AVX512BW-NEXT:    shrl $8, %ecx
    863 ; AVX512BW-NEXT:    subb %cl, %al
    864 ; AVX512BW-NEXT:    shrb %al
    865 ; AVX512BW-NEXT:    addb %cl, %al
    866 ; AVX512BW-NEXT:    shrb $2, %al
    867 ; AVX512BW-NEXT:    movzbl %al, %eax
    868 ; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
    869 ; AVX512BW-NEXT:    vpextrb $14, %xmm0, %eax
    870 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    871 ; AVX512BW-NEXT:    shrl $8, %ecx
    872 ; AVX512BW-NEXT:    subb %cl, %al
    873 ; AVX512BW-NEXT:    shrb %al
    874 ; AVX512BW-NEXT:    addb %cl, %al
    875 ; AVX512BW-NEXT:    shrb $2, %al
    876 ; AVX512BW-NEXT:    movzbl %al, %eax
    877 ; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
    878 ; AVX512BW-NEXT:    vpextrb $15, %xmm0, %eax
    879 ; AVX512BW-NEXT:    imull $37, %eax, %ecx
    880 ; AVX512BW-NEXT:    shrl $8, %ecx
    881 ; AVX512BW-NEXT:    subb %cl, %al
    882 ; AVX512BW-NEXT:    shrb %al
    883 ; AVX512BW-NEXT:    addb %cl, %al
    884 ; AVX512BW-NEXT:    shrb $2, %al
    885 ; AVX512BW-NEXT:    movzbl %al, %eax
    886 ; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm0
    887 ; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
    888 ; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
    889 ; AVX512BW-NEXT:    retq
    890   %res = udiv <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
    891   ret <64 x i8> %res
    892 }
    893 
    894 ;
    895 ; urem by 7
    896 ;
    897 
    898 define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind {
    899 ; AVX-LABEL: test_rem7_8i64:
    900 ; AVX:       # BB#0:
    901 ; AVX-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
    902 ; AVX-NEXT:    vpextrq $1, %xmm1, %rcx
    903 ; AVX-NEXT:    movabsq $2635249153387078803, %rsi # imm = 0x2492492492492493
    904 ; AVX-NEXT:    movq %rcx, %rax
    905 ; AVX-NEXT:    mulq %rsi
    906 ; AVX-NEXT:    movq %rcx, %rax
    907 ; AVX-NEXT:    subq %rdx, %rax
    908 ; AVX-NEXT:    shrq %rax
    909 ; AVX-NEXT:    addq %rdx, %rax
    910 ; AVX-NEXT:    shrq $2, %rax
    911 ; AVX-NEXT:    leaq (,%rax,8), %rdx
    912 ; AVX-NEXT:    subq %rax, %rdx
    913 ; AVX-NEXT:    subq %rdx, %rcx
    914 ; AVX-NEXT:    vmovq %rcx, %xmm2
    915 ; AVX-NEXT:    vmovq %xmm1, %rcx
    916 ; AVX-NEXT:    movq %rcx, %rax
    917 ; AVX-NEXT:    mulq %rsi
    918 ; AVX-NEXT:    movq %rcx, %rax
    919 ; AVX-NEXT:    subq %rdx, %rax
    920 ; AVX-NEXT:    shrq %rax
    921 ; AVX-NEXT:    addq %rdx, %rax
    922 ; AVX-NEXT:    shrq $2, %rax
    923 ; AVX-NEXT:    leaq (,%rax,8), %rdx
    924 ; AVX-NEXT:    subq %rax, %rdx
    925 ; AVX-NEXT:    subq %rdx, %rcx
    926 ; AVX-NEXT:    vmovq %rcx, %xmm1
    927 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
    928 ; AVX-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
    929 ; AVX-NEXT:    vpextrq $1, %xmm2, %rcx
    930 ; AVX-NEXT:    movq %rcx, %rax
    931 ; AVX-NEXT:    mulq %rsi
    932 ; AVX-NEXT:    movq %rcx, %rax
    933 ; AVX-NEXT:    subq %rdx, %rax
    934 ; AVX-NEXT:    shrq %rax
    935 ; AVX-NEXT:    addq %rdx, %rax
    936 ; AVX-NEXT:    shrq $2, %rax
    937 ; AVX-NEXT:    leaq (,%rax,8), %rdx
    938 ; AVX-NEXT:    subq %rax, %rdx
    939 ; AVX-NEXT:    subq %rdx, %rcx
    940 ; AVX-NEXT:    vmovq %rcx, %xmm3
    941 ; AVX-NEXT:    vmovq %xmm2, %rcx
    942 ; AVX-NEXT:    movq %rcx, %rax
    943 ; AVX-NEXT:    mulq %rsi
    944 ; AVX-NEXT:    movq %rcx, %rax
    945 ; AVX-NEXT:    subq %rdx, %rax
    946 ; AVX-NEXT:    shrq %rax
    947 ; AVX-NEXT:    addq %rdx, %rax
    948 ; AVX-NEXT:    shrq $2, %rax
    949 ; AVX-NEXT:    leaq (,%rax,8), %rdx
    950 ; AVX-NEXT:    subq %rax, %rdx
    951 ; AVX-NEXT:    subq %rdx, %rcx
    952 ; AVX-NEXT:    vmovq %rcx, %xmm2
    953 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
    954 ; AVX-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
    955 ; AVX-NEXT:    vextracti32x4 $1, %zmm0, %xmm2
    956 ; AVX-NEXT:    vpextrq $1, %xmm2, %rcx
    957 ; AVX-NEXT:    movq %rcx, %rax
    958 ; AVX-NEXT:    mulq %rsi
    959 ; AVX-NEXT:    movq %rcx, %rax
    960 ; AVX-NEXT:    subq %rdx, %rax
    961 ; AVX-NEXT:    shrq %rax
    962 ; AVX-NEXT:    addq %rdx, %rax
    963 ; AVX-NEXT:    shrq $2, %rax
    964 ; AVX-NEXT:    leaq (,%rax,8), %rdx
    965 ; AVX-NEXT:    subq %rax, %rdx
    966 ; AVX-NEXT:    subq %rdx, %rcx
    967 ; AVX-NEXT:    vmovq %rcx, %xmm3
    968 ; AVX-NEXT:    vmovq %xmm2, %rcx
    969 ; AVX-NEXT:    movq %rcx, %rax
    970 ; AVX-NEXT:    mulq %rsi
    971 ; AVX-NEXT:    movq %rcx, %rax
    972 ; AVX-NEXT:    subq %rdx, %rax
    973 ; AVX-NEXT:    shrq %rax
    974 ; AVX-NEXT:    addq %rdx, %rax
    975 ; AVX-NEXT:    shrq $2, %rax
    976 ; AVX-NEXT:    leaq (,%rax,8), %rdx
    977 ; AVX-NEXT:    subq %rax, %rdx
    978 ; AVX-NEXT:    subq %rdx, %rcx
    979 ; AVX-NEXT:    vmovq %rcx, %xmm2
    980 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
    981 ; AVX-NEXT:    vpextrq $1, %xmm0, %rcx
    982 ; AVX-NEXT:    movq %rcx, %rax
    983 ; AVX-NEXT:    mulq %rsi
    984 ; AVX-NEXT:    movq %rcx, %rax
    985 ; AVX-NEXT:    subq %rdx, %rax
    986 ; AVX-NEXT:    shrq %rax
    987 ; AVX-NEXT:    addq %rdx, %rax
    988 ; AVX-NEXT:    shrq $2, %rax
    989 ; AVX-NEXT:    leaq (,%rax,8), %rdx
    990 ; AVX-NEXT:    subq %rax, %rdx
    991 ; AVX-NEXT:    subq %rdx, %rcx
    992 ; AVX-NEXT:    vmovq %rcx, %xmm3
    993 ; AVX-NEXT:    vmovq %xmm0, %rcx
    994 ; AVX-NEXT:    movq %rcx, %rax
    995 ; AVX-NEXT:    mulq %rsi
    996 ; AVX-NEXT:    movq %rcx, %rax
    997 ; AVX-NEXT:    subq %rdx, %rax
    998 ; AVX-NEXT:    shrq %rax
    999 ; AVX-NEXT:    addq %rdx, %rax
   1000 ; AVX-NEXT:    shrq $2, %rax
   1001 ; AVX-NEXT:    leaq (,%rax,8), %rdx
   1002 ; AVX-NEXT:    subq %rax, %rdx
   1003 ; AVX-NEXT:    subq %rdx, %rcx
   1004 ; AVX-NEXT:    vmovq %rcx, %xmm0
   1005 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
   1006 ; AVX-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
   1007 ; AVX-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
   1008 ; AVX-NEXT:    retq
   1009   %res = urem <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
   1010   ret <8 x i64> %res
   1011 }
   1012 
   1013 define <16 x i32> @test_rem7_16i32(<16 x i32> %a) nounwind {
   1014 ; AVX-LABEL: test_rem7_16i32:
   1015 ; AVX:       # BB#0:
   1016 ; AVX-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
   1017 ; AVX-NEXT:    vpextrd $1, %xmm1, %eax
   1018 ; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
   1019 ; AVX-NEXT:    shrq $32, %rcx
   1020 ; AVX-NEXT:    movl %eax, %edx
   1021 ; AVX-NEXT:    subl %ecx, %edx
   1022 ; AVX-NEXT:    shrl %edx
   1023 ; AVX-NEXT:    addl %ecx, %edx
   1024 ; AVX-NEXT:    shrl $2, %edx
   1025 ; AVX-NEXT:    leal (,%rdx,8), %ecx
   1026 ; AVX-NEXT:    subl %edx, %ecx
   1027 ; AVX-NEXT:    subl %ecx, %eax
   1028 ; AVX-NEXT:    vmovd %xmm1, %ecx
   1029 ; AVX-NEXT:    imulq $613566757, %rcx, %rdx # imm = 0x24924925
   1030 ; AVX-NEXT:    shrq $32, %rdx
   1031 ; AVX-NEXT:    movl %ecx, %esi
   1032 ; AVX-NEXT:    subl %edx, %esi
   1033 ; AVX-NEXT:    shrl %esi
   1034 ; AVX-NEXT:    addl %edx, %esi
   1035 ; AVX-NEXT:    shrl $2, %esi
   1036 ; AVX-NEXT:    leal (,%rsi,8), %edx
   1037 ; AVX-NEXT:    subl %esi, %edx
   1038 ; AVX-NEXT:    subl %edx, %ecx
   1039 ; AVX-NEXT:    vmovd %ecx, %xmm2
   1040 ; AVX-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
   1041 ; AVX-NEXT:    vpextrd $2, %xmm1, %eax
   1042 ; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
   1043 ; AVX-NEXT:    shrq $32, %rcx
   1044 ; AVX-NEXT:    movl %eax, %edx
   1045 ; AVX-NEXT:    subl %ecx, %edx
   1046 ; AVX-NEXT:    shrl %edx
   1047 ; AVX-NEXT:    addl %ecx, %edx
   1048 ; AVX-NEXT:    shrl $2, %edx
   1049 ; AVX-NEXT:    leal (,%rdx,8), %ecx
   1050 ; AVX-NEXT:    subl %edx, %ecx
   1051 ; AVX-NEXT:    subl %ecx, %eax
   1052 ; AVX-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
   1053 ; AVX-NEXT:    vpextrd $3, %xmm1, %eax
   1054 ; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
   1055 ; AVX-NEXT:    shrq $32, %rcx
   1056 ; AVX-NEXT:    movl %eax, %edx
   1057 ; AVX-NEXT:    subl %ecx, %edx
   1058 ; AVX-NEXT:    shrl %edx
   1059 ; AVX-NEXT:    addl %ecx, %edx
   1060 ; AVX-NEXT:    shrl $2, %edx
   1061 ; AVX-NEXT:    leal (,%rdx,8), %ecx
   1062 ; AVX-NEXT:    subl %edx, %ecx
   1063 ; AVX-NEXT:    subl %ecx, %eax
   1064 ; AVX-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm1
   1065 ; AVX-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
   1066 ; AVX-NEXT:    vpextrd $1, %xmm2, %eax
   1067 ; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
   1068 ; AVX-NEXT:    shrq $32, %rcx
   1069 ; AVX-NEXT:    movl %eax, %edx
   1070 ; AVX-NEXT:    subl %ecx, %edx
   1071 ; AVX-NEXT:    shrl %edx
   1072 ; AVX-NEXT:    addl %ecx, %edx
   1073 ; AVX-NEXT:    shrl $2, %edx
   1074 ; AVX-NEXT:    leal (,%rdx,8), %ecx
   1075 ; AVX-NEXT:    subl %edx, %ecx
   1076 ; AVX-NEXT:    subl %ecx, %eax
   1077 ; AVX-NEXT:    vmovd %xmm2, %ecx
   1078 ; AVX-NEXT:    imulq $613566757, %rcx, %rdx # imm = 0x24924925
   1079 ; AVX-NEXT:    shrq $32, %rdx
   1080 ; AVX-NEXT:    movl %ecx, %esi
   1081 ; AVX-NEXT:    subl %edx, %esi
   1082 ; AVX-NEXT:    shrl %esi
   1083 ; AVX-NEXT:    addl %edx, %esi
   1084 ; AVX-NEXT:    shrl $2, %esi
   1085 ; AVX-NEXT:    leal (,%rsi,8), %edx
   1086 ; AVX-NEXT:    subl %esi, %edx
   1087 ; AVX-NEXT:    subl %edx, %ecx
   1088 ; AVX-NEXT:    vmovd %ecx, %xmm3
   1089 ; AVX-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
   1090 ; AVX-NEXT:    vpextrd $2, %xmm2, %eax
   1091 ; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
   1092 ; AVX-NEXT:    shrq $32, %rcx
   1093 ; AVX-NEXT:    movl %eax, %edx
   1094 ; AVX-NEXT:    subl %ecx, %edx
   1095 ; AVX-NEXT:    shrl %edx
   1096 ; AVX-NEXT:    addl %ecx, %edx
   1097 ; AVX-NEXT:    shrl $2, %edx
   1098 ; AVX-NEXT:    leal (,%rdx,8), %ecx
   1099 ; AVX-NEXT:    subl %edx, %ecx
   1100 ; AVX-NEXT:    subl %ecx, %eax
   1101 ; AVX-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
   1102 ; AVX-NEXT:    vpextrd $3, %xmm2, %eax
   1103 ; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
   1104 ; AVX-NEXT:    shrq $32, %rcx
   1105 ; AVX-NEXT:    movl %eax, %edx
   1106 ; AVX-NEXT:    subl %ecx, %edx
   1107 ; AVX-NEXT:    shrl %edx
   1108 ; AVX-NEXT:    addl %ecx, %edx
   1109 ; AVX-NEXT:    shrl $2, %edx
   1110 ; AVX-NEXT:    leal (,%rdx,8), %ecx
   1111 ; AVX-NEXT:    subl %edx, %ecx
   1112 ; AVX-NEXT:    subl %ecx, %eax
   1113 ; AVX-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm2
   1114 ; AVX-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
   1115 ; AVX-NEXT:    vextracti32x4 $1, %zmm0, %xmm2
   1116 ; AVX-NEXT:    vpextrd $1, %xmm2, %eax
   1117 ; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
   1118 ; AVX-NEXT:    shrq $32, %rcx
   1119 ; AVX-NEXT:    movl %eax, %edx
   1120 ; AVX-NEXT:    subl %ecx, %edx
   1121 ; AVX-NEXT:    shrl %edx
   1122 ; AVX-NEXT:    addl %ecx, %edx
   1123 ; AVX-NEXT:    shrl $2, %edx
   1124 ; AVX-NEXT:    leal (,%rdx,8), %ecx
   1125 ; AVX-NEXT:    subl %edx, %ecx
   1126 ; AVX-NEXT:    subl %ecx, %eax
   1127 ; AVX-NEXT:    vmovd %xmm2, %ecx
   1128 ; AVX-NEXT:    imulq $613566757, %rcx, %rdx # imm = 0x24924925
   1129 ; AVX-NEXT:    shrq $32, %rdx
   1130 ; AVX-NEXT:    movl %ecx, %esi
   1131 ; AVX-NEXT:    subl %edx, %esi
   1132 ; AVX-NEXT:    shrl %esi
   1133 ; AVX-NEXT:    addl %edx, %esi
   1134 ; AVX-NEXT:    shrl $2, %esi
   1135 ; AVX-NEXT:    leal (,%rsi,8), %edx
   1136 ; AVX-NEXT:    subl %esi, %edx
   1137 ; AVX-NEXT:    subl %edx, %ecx
   1138 ; AVX-NEXT:    vmovd %ecx, %xmm3
   1139 ; AVX-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
   1140 ; AVX-NEXT:    vpextrd $2, %xmm2, %eax
   1141 ; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
   1142 ; AVX-NEXT:    shrq $32, %rcx
   1143 ; AVX-NEXT:    movl %eax, %edx
   1144 ; AVX-NEXT:    subl %ecx, %edx
   1145 ; AVX-NEXT:    shrl %edx
   1146 ; AVX-NEXT:    addl %ecx, %edx
   1147 ; AVX-NEXT:    shrl $2, %edx
   1148 ; AVX-NEXT:    leal (,%rdx,8), %ecx
   1149 ; AVX-NEXT:    subl %edx, %ecx
   1150 ; AVX-NEXT:    subl %ecx, %eax
   1151 ; AVX-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
   1152 ; AVX-NEXT:    vpextrd $3, %xmm2, %eax
   1153 ; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
   1154 ; AVX-NEXT:    shrq $32, %rcx
   1155 ; AVX-NEXT:    movl %eax, %edx
   1156 ; AVX-NEXT:    subl %ecx, %edx
   1157 ; AVX-NEXT:    shrl %edx
   1158 ; AVX-NEXT:    addl %ecx, %edx
   1159 ; AVX-NEXT:    shrl $2, %edx
   1160 ; AVX-NEXT:    leal (,%rdx,8), %ecx
   1161 ; AVX-NEXT:    subl %edx, %ecx
   1162 ; AVX-NEXT:    subl %ecx, %eax
   1163 ; AVX-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm2
   1164 ; AVX-NEXT:    vpextrd $1, %xmm0, %eax
   1165 ; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
   1166 ; AVX-NEXT:    shrq $32, %rcx
   1167 ; AVX-NEXT:    movl %eax, %edx
   1168 ; AVX-NEXT:    subl %ecx, %edx
   1169 ; AVX-NEXT:    shrl %edx
   1170 ; AVX-NEXT:    addl %ecx, %edx
   1171 ; AVX-NEXT:    shrl $2, %edx
   1172 ; AVX-NEXT:    leal (,%rdx,8), %ecx
   1173 ; AVX-NEXT:    subl %edx, %ecx
   1174 ; AVX-NEXT:    subl %ecx, %eax
   1175 ; AVX-NEXT:    vmovd %xmm0, %ecx
   1176 ; AVX-NEXT:    imulq $613566757, %rcx, %rdx # imm = 0x24924925
   1177 ; AVX-NEXT:    shrq $32, %rdx
   1178 ; AVX-NEXT:    movl %ecx, %esi
   1179 ; AVX-NEXT:    subl %edx, %esi
   1180 ; AVX-NEXT:    shrl %esi
   1181 ; AVX-NEXT:    addl %edx, %esi
   1182 ; AVX-NEXT:    shrl $2, %esi
   1183 ; AVX-NEXT:    leal (,%rsi,8), %edx
   1184 ; AVX-NEXT:    subl %esi, %edx
   1185 ; AVX-NEXT:    subl %edx, %ecx
   1186 ; AVX-NEXT:    vmovd %ecx, %xmm3
   1187 ; AVX-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
   1188 ; AVX-NEXT:    vpextrd $2, %xmm0, %eax
   1189 ; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
   1190 ; AVX-NEXT:    shrq $32, %rcx
   1191 ; AVX-NEXT:    movl %eax, %edx
   1192 ; AVX-NEXT:    subl %ecx, %edx
   1193 ; AVX-NEXT:    shrl %edx
   1194 ; AVX-NEXT:    addl %ecx, %edx
   1195 ; AVX-NEXT:    shrl $2, %edx
   1196 ; AVX-NEXT:    leal (,%rdx,8), %ecx
   1197 ; AVX-NEXT:    subl %edx, %ecx
   1198 ; AVX-NEXT:    subl %ecx, %eax
   1199 ; AVX-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
   1200 ; AVX-NEXT:    vpextrd $3, %xmm0, %eax
   1201 ; AVX-NEXT:    imulq $613566757, %rax, %rcx # imm = 0x24924925
   1202 ; AVX-NEXT:    shrq $32, %rcx
   1203 ; AVX-NEXT:    movl %eax, %edx
   1204 ; AVX-NEXT:    subl %ecx, %edx
   1205 ; AVX-NEXT:    shrl %edx
   1206 ; AVX-NEXT:    addl %ecx, %edx
   1207 ; AVX-NEXT:    shrl $2, %edx
   1208 ; AVX-NEXT:    leal (,%rdx,8), %ecx
   1209 ; AVX-NEXT:    subl %edx, %ecx
   1210 ; AVX-NEXT:    subl %ecx, %eax
   1211 ; AVX-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm0
   1212 ; AVX-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
   1213 ; AVX-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
   1214 ; AVX-NEXT:    retq
   1215   %res = urem <16 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
   1216   ret <16 x i32> %res
   1217 }
   1218 
   1219 define <32 x i16> @test_rem7_32i16(<32 x i16> %a) nounwind {
   1220 ; AVX512F-LABEL: test_rem7_32i16:
   1221 ; AVX512F:       # BB#0:
   1222 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm2 = [9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363]
   1223 ; AVX512F-NEXT:    vpmulhuw %ymm2, %ymm0, %ymm3
   1224 ; AVX512F-NEXT:    vpsubw %ymm3, %ymm0, %ymm4
   1225 ; AVX512F-NEXT:    vpsrlw $1, %ymm4, %ymm4
   1226 ; AVX512F-NEXT:    vpaddw %ymm3, %ymm4, %ymm3
   1227 ; AVX512F-NEXT:    vpsrlw $2, %ymm3, %ymm3
   1228 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm4 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
   1229 ; AVX512F-NEXT:    vpmullw %ymm4, %ymm3, %ymm3
   1230 ; AVX512F-NEXT:    vpsubw %ymm3, %ymm0, %ymm0
   1231 ; AVX512F-NEXT:    vpmulhuw %ymm2, %ymm1, %ymm2
   1232 ; AVX512F-NEXT:    vpsubw %ymm2, %ymm1, %ymm3
   1233 ; AVX512F-NEXT:    vpsrlw $1, %ymm3, %ymm3
   1234 ; AVX512F-NEXT:    vpaddw %ymm2, %ymm3, %ymm2
   1235 ; AVX512F-NEXT:    vpsrlw $2, %ymm2, %ymm2
   1236 ; AVX512F-NEXT:    vpmullw %ymm4, %ymm2, %ymm2
   1237 ; AVX512F-NEXT:    vpsubw %ymm2, %ymm1, %ymm1
   1238 ; AVX512F-NEXT:    retq
   1239 ;
   1240 ; AVX512BW-LABEL: test_rem7_32i16:
   1241 ; AVX512BW:       # BB#0:
   1242 ; AVX512BW-NEXT:    vpmulhuw {{.*}}(%rip), %zmm0, %zmm1
   1243 ; AVX512BW-NEXT:    vpsubw %zmm1, %zmm0, %zmm2
   1244 ; AVX512BW-NEXT:    vpsrlw $1, %zmm2, %zmm2
   1245 ; AVX512BW-NEXT:    vpaddw %zmm1, %zmm2, %zmm1
   1246 ; AVX512BW-NEXT:    vpsrlw $2, %zmm1, %zmm1
   1247 ; AVX512BW-NEXT:    vpmullw {{.*}}(%rip), %zmm1, %zmm1
   1248 ; AVX512BW-NEXT:    vpsubw %zmm1, %zmm0, %zmm0
   1249 ; AVX512BW-NEXT:    retq
   1250   %res = urem <32 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
   1251   ret <32 x i16> %res
   1252 }
   1253 
   1254 define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
   1255 ; AVX512F-LABEL: test_rem7_64i8:
   1256 ; AVX512F:       # BB#0:
   1257 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm3 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37]
   1258 ; AVX512F-NEXT:    vextracti128 $1, %ymm3, %xmm2
   1259 ; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
   1260 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm4
   1261 ; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
   1262 ; AVX512F-NEXT:    vpmullw %ymm2, %ymm4, %ymm4
   1263 ; AVX512F-NEXT:    vpsrlw $8, %ymm4, %ymm5
   1264 ; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm4 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
   1265 ; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
   1266 ; AVX512F-NEXT:    vpmullw %ymm4, %ymm3, %ymm3
   1267 ; AVX512F-NEXT:    vpsrlw $8, %ymm3, %ymm3
   1268 ; AVX512F-NEXT:    vperm2i128 {{.*#+}} ymm6 = ymm3[2,3],ymm5[2,3]
   1269 ; AVX512F-NEXT:    vinserti128 $1, %xmm5, %ymm3, %ymm3
   1270 ; AVX512F-NEXT:    vpackuswb %ymm6, %ymm3, %ymm3
   1271 ; AVX512F-NEXT:    vpsubb %ymm3, %ymm0, %ymm5
   1272 ; AVX512F-NEXT:    vpsrlw $1, %ymm5, %ymm6
   1273 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm5 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
   1274 ; AVX512F-NEXT:    vpand %ymm5, %ymm6, %ymm6
   1275 ; AVX512F-NEXT:    vpaddb %ymm3, %ymm6, %ymm3
   1276 ; AVX512F-NEXT:    vpsrlw $2, %ymm3, %ymm3
   1277 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
   1278 ; AVX512F-NEXT:    vpand %ymm6, %ymm3, %ymm7
   1279 ; AVX512F-NEXT:    vpmovsxbw %xmm7, %ymm8
   1280 ; AVX512F-NEXT:    vpmovsxbw {{.*}}(%rip), %ymm3
   1281 ; AVX512F-NEXT:    vpmullw %ymm3, %ymm8, %ymm8
   1282 ; AVX512F-NEXT:    vpmovsxwd %ymm8, %zmm8
   1283 ; AVX512F-NEXT:    vpmovdb %zmm8, %xmm8
   1284 ; AVX512F-NEXT:    vextracti128 $1, %ymm7, %xmm7
   1285 ; AVX512F-NEXT:    vpmovsxbw %xmm7, %ymm7
   1286 ; AVX512F-NEXT:    vpmullw %ymm3, %ymm7, %ymm7
   1287 ; AVX512F-NEXT:    vpmovsxwd %ymm7, %zmm7
   1288 ; AVX512F-NEXT:    vpmovdb %zmm7, %xmm7
   1289 ; AVX512F-NEXT:    vinserti128 $1, %xmm7, %ymm8, %ymm7
   1290 ; AVX512F-NEXT:    vpsubb %ymm7, %ymm0, %ymm0
   1291 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm7
   1292 ; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm7 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero,xmm7[8],zero,xmm7[9],zero,xmm7[10],zero,xmm7[11],zero,xmm7[12],zero,xmm7[13],zero,xmm7[14],zero,xmm7[15],zero
   1293 ; AVX512F-NEXT:    vpmullw %ymm2, %ymm7, %ymm2
   1294 ; AVX512F-NEXT:    vpsrlw $8, %ymm2, %ymm2
   1295 ; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm7 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
   1296 ; AVX512F-NEXT:    vpmullw %ymm4, %ymm7, %ymm4
   1297 ; AVX512F-NEXT:    vpsrlw $8, %ymm4, %ymm4
   1298 ; AVX512F-NEXT:    vperm2i128 {{.*#+}} ymm7 = ymm4[2,3],ymm2[2,3]
   1299 ; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm4, %ymm2
   1300 ; AVX512F-NEXT:    vpackuswb %ymm7, %ymm2, %ymm2
   1301 ; AVX512F-NEXT:    vpsubb %ymm2, %ymm1, %ymm4
   1302 ; AVX512F-NEXT:    vpsrlw $1, %ymm4, %ymm4
   1303 ; AVX512F-NEXT:    vpand %ymm5, %ymm4, %ymm4
   1304 ; AVX512F-NEXT:    vpaddb %ymm2, %ymm4, %ymm2
   1305 ; AVX512F-NEXT:    vpsrlw $2, %ymm2, %ymm2
   1306 ; AVX512F-NEXT:    vpand %ymm6, %ymm2, %ymm2
   1307 ; AVX512F-NEXT:    vpmovsxbw %xmm2, %ymm4
   1308 ; AVX512F-NEXT:    vpmullw %ymm3, %ymm4, %ymm4
   1309 ; AVX512F-NEXT:    vpmovsxwd %ymm4, %zmm4
   1310 ; AVX512F-NEXT:    vpmovdb %zmm4, %xmm4
   1311 ; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm2
   1312 ; AVX512F-NEXT:    vpmovsxbw %xmm2, %ymm2
   1313 ; AVX512F-NEXT:    vpmullw %ymm3, %ymm2, %ymm2
   1314 ; AVX512F-NEXT:    vpmovsxwd %ymm2, %zmm2
   1315 ; AVX512F-NEXT:    vpmovdb %zmm2, %xmm2
   1316 ; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm4, %ymm2
   1317 ; AVX512F-NEXT:    vpsubb %ymm2, %ymm1, %ymm1
   1318 ; AVX512F-NEXT:    retq
   1319 ;
   1320 ; AVX512BW-LABEL: test_rem7_64i8:
   1321 ; AVX512BW:       # BB#0:
   1322 ; AVX512BW-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
   1323 ; AVX512BW-NEXT:    vpextrb $1, %xmm1, %edx
   1324 ; AVX512BW-NEXT:    imull $37, %edx, %ecx
   1325 ; AVX512BW-NEXT:    shrl $8, %ecx
   1326 ; AVX512BW-NEXT:    movl %edx, %eax
   1327 ; AVX512BW-NEXT:    subb %cl, %al
   1328 ; AVX512BW-NEXT:    shrb %al
   1329 ; AVX512BW-NEXT:    addb %cl, %al
   1330 ; AVX512BW-NEXT:    shrb $2, %al
   1331 ; AVX512BW-NEXT:    movb $7, %cl
   1332 ; AVX512BW-NEXT:    mulb %cl
   1333 ; AVX512BW-NEXT:    subb %al, %dl
   1334 ; AVX512BW-NEXT:    movzbl %dl, %edx
   1335 ; AVX512BW-NEXT:    vpextrb $0, %xmm1, %esi
   1336 ; AVX512BW-NEXT:    imull $37, %esi, %edi
   1337 ; AVX512BW-NEXT:    shrl $8, %edi
   1338 ; AVX512BW-NEXT:    movl %esi, %eax
   1339 ; AVX512BW-NEXT:    subb %dil, %al
   1340 ; AVX512BW-NEXT:    shrb %al
   1341 ; AVX512BW-NEXT:    addb %dil, %al
   1342 ; AVX512BW-NEXT:    shrb $2, %al
   1343 ; AVX512BW-NEXT:    mulb %cl
   1344 ; AVX512BW-NEXT:    subb %al, %sil
   1345 ; AVX512BW-NEXT:    movzbl %sil, %eax
   1346 ; AVX512BW-NEXT:    vmovd %eax, %xmm2
   1347 ; AVX512BW-NEXT:    vpinsrb $1, %edx, %xmm2, %xmm2
   1348 ; AVX512BW-NEXT:    vpextrb $2, %xmm1, %edx
   1349 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1350 ; AVX512BW-NEXT:    shrl $8, %esi
   1351 ; AVX512BW-NEXT:    movl %edx, %eax
   1352 ; AVX512BW-NEXT:    subb %sil, %al
   1353 ; AVX512BW-NEXT:    shrb %al
   1354 ; AVX512BW-NEXT:    addb %sil, %al
   1355 ; AVX512BW-NEXT:    shrb $2, %al
   1356 ; AVX512BW-NEXT:    mulb %cl
   1357 ; AVX512BW-NEXT:    subb %al, %dl
   1358 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1359 ; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm2, %xmm2
   1360 ; AVX512BW-NEXT:    vpextrb $3, %xmm1, %edx
   1361 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1362 ; AVX512BW-NEXT:    shrl $8, %esi
   1363 ; AVX512BW-NEXT:    movl %edx, %eax
   1364 ; AVX512BW-NEXT:    subb %sil, %al
   1365 ; AVX512BW-NEXT:    shrb %al
   1366 ; AVX512BW-NEXT:    addb %sil, %al
   1367 ; AVX512BW-NEXT:    shrb $2, %al
   1368 ; AVX512BW-NEXT:    mulb %cl
   1369 ; AVX512BW-NEXT:    subb %al, %dl
   1370 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1371 ; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm2, %xmm2
   1372 ; AVX512BW-NEXT:    vpextrb $4, %xmm1, %edx
   1373 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1374 ; AVX512BW-NEXT:    shrl $8, %esi
   1375 ; AVX512BW-NEXT:    movl %edx, %eax
   1376 ; AVX512BW-NEXT:    subb %sil, %al
   1377 ; AVX512BW-NEXT:    shrb %al
   1378 ; AVX512BW-NEXT:    addb %sil, %al
   1379 ; AVX512BW-NEXT:    shrb $2, %al
   1380 ; AVX512BW-NEXT:    mulb %cl
   1381 ; AVX512BW-NEXT:    subb %al, %dl
   1382 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1383 ; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm2, %xmm2
   1384 ; AVX512BW-NEXT:    vpextrb $5, %xmm1, %edx
   1385 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1386 ; AVX512BW-NEXT:    shrl $8, %esi
   1387 ; AVX512BW-NEXT:    movl %edx, %eax
   1388 ; AVX512BW-NEXT:    subb %sil, %al
   1389 ; AVX512BW-NEXT:    shrb %al
   1390 ; AVX512BW-NEXT:    addb %sil, %al
   1391 ; AVX512BW-NEXT:    shrb $2, %al
   1392 ; AVX512BW-NEXT:    mulb %cl
   1393 ; AVX512BW-NEXT:    subb %al, %dl
   1394 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1395 ; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm2, %xmm2
   1396 ; AVX512BW-NEXT:    vpextrb $6, %xmm1, %edx
   1397 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1398 ; AVX512BW-NEXT:    shrl $8, %esi
   1399 ; AVX512BW-NEXT:    movl %edx, %eax
   1400 ; AVX512BW-NEXT:    subb %sil, %al
   1401 ; AVX512BW-NEXT:    shrb %al
   1402 ; AVX512BW-NEXT:    addb %sil, %al
   1403 ; AVX512BW-NEXT:    shrb $2, %al
   1404 ; AVX512BW-NEXT:    mulb %cl
   1405 ; AVX512BW-NEXT:    subb %al, %dl
   1406 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1407 ; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm2, %xmm2
   1408 ; AVX512BW-NEXT:    vpextrb $7, %xmm1, %edx
   1409 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1410 ; AVX512BW-NEXT:    shrl $8, %esi
   1411 ; AVX512BW-NEXT:    movl %edx, %eax
   1412 ; AVX512BW-NEXT:    subb %sil, %al
   1413 ; AVX512BW-NEXT:    shrb %al
   1414 ; AVX512BW-NEXT:    addb %sil, %al
   1415 ; AVX512BW-NEXT:    shrb $2, %al
   1416 ; AVX512BW-NEXT:    mulb %cl
   1417 ; AVX512BW-NEXT:    subb %al, %dl
   1418 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1419 ; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm2, %xmm2
   1420 ; AVX512BW-NEXT:    vpextrb $8, %xmm1, %edx
   1421 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1422 ; AVX512BW-NEXT:    shrl $8, %esi
   1423 ; AVX512BW-NEXT:    movl %edx, %eax
   1424 ; AVX512BW-NEXT:    subb %sil, %al
   1425 ; AVX512BW-NEXT:    shrb %al
   1426 ; AVX512BW-NEXT:    addb %sil, %al
   1427 ; AVX512BW-NEXT:    shrb $2, %al
   1428 ; AVX512BW-NEXT:    mulb %cl
   1429 ; AVX512BW-NEXT:    subb %al, %dl
   1430 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1431 ; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm2, %xmm2
   1432 ; AVX512BW-NEXT:    vpextrb $9, %xmm1, %edx
   1433 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1434 ; AVX512BW-NEXT:    shrl $8, %esi
   1435 ; AVX512BW-NEXT:    movl %edx, %eax
   1436 ; AVX512BW-NEXT:    subb %sil, %al
   1437 ; AVX512BW-NEXT:    shrb %al
   1438 ; AVX512BW-NEXT:    addb %sil, %al
   1439 ; AVX512BW-NEXT:    shrb $2, %al
   1440 ; AVX512BW-NEXT:    mulb %cl
   1441 ; AVX512BW-NEXT:    subb %al, %dl
   1442 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1443 ; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm2, %xmm2
   1444 ; AVX512BW-NEXT:    vpextrb $10, %xmm1, %edx
   1445 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1446 ; AVX512BW-NEXT:    shrl $8, %esi
   1447 ; AVX512BW-NEXT:    movl %edx, %eax
   1448 ; AVX512BW-NEXT:    subb %sil, %al
   1449 ; AVX512BW-NEXT:    shrb %al
   1450 ; AVX512BW-NEXT:    addb %sil, %al
   1451 ; AVX512BW-NEXT:    shrb $2, %al
   1452 ; AVX512BW-NEXT:    mulb %cl
   1453 ; AVX512BW-NEXT:    subb %al, %dl
   1454 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1455 ; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm2, %xmm2
   1456 ; AVX512BW-NEXT:    vpextrb $11, %xmm1, %edx
   1457 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1458 ; AVX512BW-NEXT:    shrl $8, %esi
   1459 ; AVX512BW-NEXT:    movl %edx, %eax
   1460 ; AVX512BW-NEXT:    subb %sil, %al
   1461 ; AVX512BW-NEXT:    shrb %al
   1462 ; AVX512BW-NEXT:    addb %sil, %al
   1463 ; AVX512BW-NEXT:    shrb $2, %al
   1464 ; AVX512BW-NEXT:    mulb %cl
   1465 ; AVX512BW-NEXT:    subb %al, %dl
   1466 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1467 ; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm2, %xmm2
   1468 ; AVX512BW-NEXT:    vpextrb $12, %xmm1, %edx
   1469 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1470 ; AVX512BW-NEXT:    shrl $8, %esi
   1471 ; AVX512BW-NEXT:    movl %edx, %eax
   1472 ; AVX512BW-NEXT:    subb %sil, %al
   1473 ; AVX512BW-NEXT:    shrb %al
   1474 ; AVX512BW-NEXT:    addb %sil, %al
   1475 ; AVX512BW-NEXT:    shrb $2, %al
   1476 ; AVX512BW-NEXT:    mulb %cl
   1477 ; AVX512BW-NEXT:    subb %al, %dl
   1478 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1479 ; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm2, %xmm2
   1480 ; AVX512BW-NEXT:    vpextrb $13, %xmm1, %edx
   1481 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1482 ; AVX512BW-NEXT:    shrl $8, %esi
   1483 ; AVX512BW-NEXT:    movl %edx, %eax
   1484 ; AVX512BW-NEXT:    subb %sil, %al
   1485 ; AVX512BW-NEXT:    shrb %al
   1486 ; AVX512BW-NEXT:    addb %sil, %al
   1487 ; AVX512BW-NEXT:    shrb $2, %al
   1488 ; AVX512BW-NEXT:    mulb %cl
   1489 ; AVX512BW-NEXT:    subb %al, %dl
   1490 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1491 ; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm2, %xmm2
   1492 ; AVX512BW-NEXT:    vpextrb $14, %xmm1, %edx
   1493 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1494 ; AVX512BW-NEXT:    shrl $8, %esi
   1495 ; AVX512BW-NEXT:    movl %edx, %eax
   1496 ; AVX512BW-NEXT:    subb %sil, %al
   1497 ; AVX512BW-NEXT:    shrb %al
   1498 ; AVX512BW-NEXT:    addb %sil, %al
   1499 ; AVX512BW-NEXT:    shrb $2, %al
   1500 ; AVX512BW-NEXT:    mulb %cl
   1501 ; AVX512BW-NEXT:    subb %al, %dl
   1502 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1503 ; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm2, %xmm2
   1504 ; AVX512BW-NEXT:    vpextrb $15, %xmm1, %edx
   1505 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1506 ; AVX512BW-NEXT:    shrl $8, %esi
   1507 ; AVX512BW-NEXT:    movl %edx, %eax
   1508 ; AVX512BW-NEXT:    subb %sil, %al
   1509 ; AVX512BW-NEXT:    shrb %al
   1510 ; AVX512BW-NEXT:    addb %sil, %al
   1511 ; AVX512BW-NEXT:    shrb $2, %al
   1512 ; AVX512BW-NEXT:    mulb %cl
   1513 ; AVX512BW-NEXT:    subb %al, %dl
   1514 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1515 ; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm2, %xmm1
   1516 ; AVX512BW-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
   1517 ; AVX512BW-NEXT:    vpextrb $1, %xmm2, %edx
   1518 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1519 ; AVX512BW-NEXT:    shrl $8, %esi
   1520 ; AVX512BW-NEXT:    movl %edx, %eax
   1521 ; AVX512BW-NEXT:    subb %sil, %al
   1522 ; AVX512BW-NEXT:    shrb %al
   1523 ; AVX512BW-NEXT:    addb %sil, %al
   1524 ; AVX512BW-NEXT:    shrb $2, %al
   1525 ; AVX512BW-NEXT:    mulb %cl
   1526 ; AVX512BW-NEXT:    subb %al, %dl
   1527 ; AVX512BW-NEXT:    movzbl %dl, %edx
   1528 ; AVX512BW-NEXT:    vpextrb $0, %xmm2, %esi
   1529 ; AVX512BW-NEXT:    imull $37, %esi, %edi
   1530 ; AVX512BW-NEXT:    shrl $8, %edi
   1531 ; AVX512BW-NEXT:    movl %esi, %eax
   1532 ; AVX512BW-NEXT:    subb %dil, %al
   1533 ; AVX512BW-NEXT:    shrb %al
   1534 ; AVX512BW-NEXT:    addb %dil, %al
   1535 ; AVX512BW-NEXT:    shrb $2, %al
   1536 ; AVX512BW-NEXT:    mulb %cl
   1537 ; AVX512BW-NEXT:    subb %al, %sil
   1538 ; AVX512BW-NEXT:    movzbl %sil, %eax
   1539 ; AVX512BW-NEXT:    vmovd %eax, %xmm3
   1540 ; AVX512BW-NEXT:    vpinsrb $1, %edx, %xmm3, %xmm3
   1541 ; AVX512BW-NEXT:    vpextrb $2, %xmm2, %edx
   1542 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1543 ; AVX512BW-NEXT:    shrl $8, %esi
   1544 ; AVX512BW-NEXT:    movl %edx, %eax
   1545 ; AVX512BW-NEXT:    subb %sil, %al
   1546 ; AVX512BW-NEXT:    shrb %al
   1547 ; AVX512BW-NEXT:    addb %sil, %al
   1548 ; AVX512BW-NEXT:    shrb $2, %al
   1549 ; AVX512BW-NEXT:    mulb %cl
   1550 ; AVX512BW-NEXT:    subb %al, %dl
   1551 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1552 ; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
   1553 ; AVX512BW-NEXT:    vpextrb $3, %xmm2, %edx
   1554 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1555 ; AVX512BW-NEXT:    shrl $8, %esi
   1556 ; AVX512BW-NEXT:    movl %edx, %eax
   1557 ; AVX512BW-NEXT:    subb %sil, %al
   1558 ; AVX512BW-NEXT:    shrb %al
   1559 ; AVX512BW-NEXT:    addb %sil, %al
   1560 ; AVX512BW-NEXT:    shrb $2, %al
   1561 ; AVX512BW-NEXT:    mulb %cl
   1562 ; AVX512BW-NEXT:    subb %al, %dl
   1563 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1564 ; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
   1565 ; AVX512BW-NEXT:    vpextrb $4, %xmm2, %edx
   1566 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1567 ; AVX512BW-NEXT:    shrl $8, %esi
   1568 ; AVX512BW-NEXT:    movl %edx, %eax
   1569 ; AVX512BW-NEXT:    subb %sil, %al
   1570 ; AVX512BW-NEXT:    shrb %al
   1571 ; AVX512BW-NEXT:    addb %sil, %al
   1572 ; AVX512BW-NEXT:    shrb $2, %al
   1573 ; AVX512BW-NEXT:    mulb %cl
   1574 ; AVX512BW-NEXT:    subb %al, %dl
   1575 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1576 ; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
   1577 ; AVX512BW-NEXT:    vpextrb $5, %xmm2, %edx
   1578 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1579 ; AVX512BW-NEXT:    shrl $8, %esi
   1580 ; AVX512BW-NEXT:    movl %edx, %eax
   1581 ; AVX512BW-NEXT:    subb %sil, %al
   1582 ; AVX512BW-NEXT:    shrb %al
   1583 ; AVX512BW-NEXT:    addb %sil, %al
   1584 ; AVX512BW-NEXT:    shrb $2, %al
   1585 ; AVX512BW-NEXT:    mulb %cl
   1586 ; AVX512BW-NEXT:    subb %al, %dl
   1587 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1588 ; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
   1589 ; AVX512BW-NEXT:    vpextrb $6, %xmm2, %edx
   1590 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1591 ; AVX512BW-NEXT:    shrl $8, %esi
   1592 ; AVX512BW-NEXT:    movl %edx, %eax
   1593 ; AVX512BW-NEXT:    subb %sil, %al
   1594 ; AVX512BW-NEXT:    shrb %al
   1595 ; AVX512BW-NEXT:    addb %sil, %al
   1596 ; AVX512BW-NEXT:    shrb $2, %al
   1597 ; AVX512BW-NEXT:    mulb %cl
   1598 ; AVX512BW-NEXT:    subb %al, %dl
   1599 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1600 ; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
   1601 ; AVX512BW-NEXT:    vpextrb $7, %xmm2, %edx
   1602 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1603 ; AVX512BW-NEXT:    shrl $8, %esi
   1604 ; AVX512BW-NEXT:    movl %edx, %eax
   1605 ; AVX512BW-NEXT:    subb %sil, %al
   1606 ; AVX512BW-NEXT:    shrb %al
   1607 ; AVX512BW-NEXT:    addb %sil, %al
   1608 ; AVX512BW-NEXT:    shrb $2, %al
   1609 ; AVX512BW-NEXT:    mulb %cl
   1610 ; AVX512BW-NEXT:    subb %al, %dl
   1611 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1612 ; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
   1613 ; AVX512BW-NEXT:    vpextrb $8, %xmm2, %edx
   1614 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1615 ; AVX512BW-NEXT:    shrl $8, %esi
   1616 ; AVX512BW-NEXT:    movl %edx, %eax
   1617 ; AVX512BW-NEXT:    subb %sil, %al
   1618 ; AVX512BW-NEXT:    shrb %al
   1619 ; AVX512BW-NEXT:    addb %sil, %al
   1620 ; AVX512BW-NEXT:    shrb $2, %al
   1621 ; AVX512BW-NEXT:    mulb %cl
   1622 ; AVX512BW-NEXT:    subb %al, %dl
   1623 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1624 ; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
   1625 ; AVX512BW-NEXT:    vpextrb $9, %xmm2, %edx
   1626 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1627 ; AVX512BW-NEXT:    shrl $8, %esi
   1628 ; AVX512BW-NEXT:    movl %edx, %eax
   1629 ; AVX512BW-NEXT:    subb %sil, %al
   1630 ; AVX512BW-NEXT:    shrb %al
   1631 ; AVX512BW-NEXT:    addb %sil, %al
   1632 ; AVX512BW-NEXT:    shrb $2, %al
   1633 ; AVX512BW-NEXT:    mulb %cl
   1634 ; AVX512BW-NEXT:    subb %al, %dl
   1635 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1636 ; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
   1637 ; AVX512BW-NEXT:    vpextrb $10, %xmm2, %edx
   1638 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1639 ; AVX512BW-NEXT:    shrl $8, %esi
   1640 ; AVX512BW-NEXT:    movl %edx, %eax
   1641 ; AVX512BW-NEXT:    subb %sil, %al
   1642 ; AVX512BW-NEXT:    shrb %al
   1643 ; AVX512BW-NEXT:    addb %sil, %al
   1644 ; AVX512BW-NEXT:    shrb $2, %al
   1645 ; AVX512BW-NEXT:    mulb %cl
   1646 ; AVX512BW-NEXT:    subb %al, %dl
   1647 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1648 ; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
   1649 ; AVX512BW-NEXT:    vpextrb $11, %xmm2, %edx
   1650 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1651 ; AVX512BW-NEXT:    shrl $8, %esi
   1652 ; AVX512BW-NEXT:    movl %edx, %eax
   1653 ; AVX512BW-NEXT:    subb %sil, %al
   1654 ; AVX512BW-NEXT:    shrb %al
   1655 ; AVX512BW-NEXT:    addb %sil, %al
   1656 ; AVX512BW-NEXT:    shrb $2, %al
   1657 ; AVX512BW-NEXT:    mulb %cl
   1658 ; AVX512BW-NEXT:    subb %al, %dl
   1659 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1660 ; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
   1661 ; AVX512BW-NEXT:    vpextrb $12, %xmm2, %edx
   1662 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1663 ; AVX512BW-NEXT:    shrl $8, %esi
   1664 ; AVX512BW-NEXT:    movl %edx, %eax
   1665 ; AVX512BW-NEXT:    subb %sil, %al
   1666 ; AVX512BW-NEXT:    shrb %al
   1667 ; AVX512BW-NEXT:    addb %sil, %al
   1668 ; AVX512BW-NEXT:    shrb $2, %al
   1669 ; AVX512BW-NEXT:    mulb %cl
   1670 ; AVX512BW-NEXT:    subb %al, %dl
   1671 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1672 ; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
   1673 ; AVX512BW-NEXT:    vpextrb $13, %xmm2, %edx
   1674 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1675 ; AVX512BW-NEXT:    shrl $8, %esi
   1676 ; AVX512BW-NEXT:    movl %edx, %eax
   1677 ; AVX512BW-NEXT:    subb %sil, %al
   1678 ; AVX512BW-NEXT:    shrb %al
   1679 ; AVX512BW-NEXT:    addb %sil, %al
   1680 ; AVX512BW-NEXT:    shrb $2, %al
   1681 ; AVX512BW-NEXT:    mulb %cl
   1682 ; AVX512BW-NEXT:    subb %al, %dl
   1683 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1684 ; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
   1685 ; AVX512BW-NEXT:    vpextrb $14, %xmm2, %edx
   1686 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1687 ; AVX512BW-NEXT:    shrl $8, %esi
   1688 ; AVX512BW-NEXT:    movl %edx, %eax
   1689 ; AVX512BW-NEXT:    subb %sil, %al
   1690 ; AVX512BW-NEXT:    shrb %al
   1691 ; AVX512BW-NEXT:    addb %sil, %al
   1692 ; AVX512BW-NEXT:    shrb $2, %al
   1693 ; AVX512BW-NEXT:    mulb %cl
   1694 ; AVX512BW-NEXT:    subb %al, %dl
   1695 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1696 ; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
   1697 ; AVX512BW-NEXT:    vpextrb $15, %xmm2, %edx
   1698 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1699 ; AVX512BW-NEXT:    shrl $8, %esi
   1700 ; AVX512BW-NEXT:    movl %edx, %eax
   1701 ; AVX512BW-NEXT:    subb %sil, %al
   1702 ; AVX512BW-NEXT:    shrb %al
   1703 ; AVX512BW-NEXT:    addb %sil, %al
   1704 ; AVX512BW-NEXT:    shrb $2, %al
   1705 ; AVX512BW-NEXT:    mulb %cl
   1706 ; AVX512BW-NEXT:    subb %al, %dl
   1707 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1708 ; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm2
   1709 ; AVX512BW-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
   1710 ; AVX512BW-NEXT:    vextracti32x4 $1, %zmm0, %xmm2
   1711 ; AVX512BW-NEXT:    vpextrb $1, %xmm2, %edx
   1712 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1713 ; AVX512BW-NEXT:    shrl $8, %esi
   1714 ; AVX512BW-NEXT:    movl %edx, %eax
   1715 ; AVX512BW-NEXT:    subb %sil, %al
   1716 ; AVX512BW-NEXT:    shrb %al
   1717 ; AVX512BW-NEXT:    addb %sil, %al
   1718 ; AVX512BW-NEXT:    shrb $2, %al
   1719 ; AVX512BW-NEXT:    mulb %cl
   1720 ; AVX512BW-NEXT:    subb %al, %dl
   1721 ; AVX512BW-NEXT:    movzbl %dl, %edx
   1722 ; AVX512BW-NEXT:    vpextrb $0, %xmm2, %esi
   1723 ; AVX512BW-NEXT:    imull $37, %esi, %edi
   1724 ; AVX512BW-NEXT:    shrl $8, %edi
   1725 ; AVX512BW-NEXT:    movl %esi, %eax
   1726 ; AVX512BW-NEXT:    subb %dil, %al
   1727 ; AVX512BW-NEXT:    shrb %al
   1728 ; AVX512BW-NEXT:    addb %dil, %al
   1729 ; AVX512BW-NEXT:    shrb $2, %al
   1730 ; AVX512BW-NEXT:    mulb %cl
   1731 ; AVX512BW-NEXT:    subb %al, %sil
   1732 ; AVX512BW-NEXT:    movzbl %sil, %eax
   1733 ; AVX512BW-NEXT:    vmovd %eax, %xmm3
   1734 ; AVX512BW-NEXT:    vpinsrb $1, %edx, %xmm3, %xmm3
   1735 ; AVX512BW-NEXT:    vpextrb $2, %xmm2, %edx
   1736 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1737 ; AVX512BW-NEXT:    shrl $8, %esi
   1738 ; AVX512BW-NEXT:    movl %edx, %eax
   1739 ; AVX512BW-NEXT:    subb %sil, %al
   1740 ; AVX512BW-NEXT:    shrb %al
   1741 ; AVX512BW-NEXT:    addb %sil, %al
   1742 ; AVX512BW-NEXT:    shrb $2, %al
   1743 ; AVX512BW-NEXT:    mulb %cl
   1744 ; AVX512BW-NEXT:    subb %al, %dl
   1745 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1746 ; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
   1747 ; AVX512BW-NEXT:    vpextrb $3, %xmm2, %edx
   1748 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1749 ; AVX512BW-NEXT:    shrl $8, %esi
   1750 ; AVX512BW-NEXT:    movl %edx, %eax
   1751 ; AVX512BW-NEXT:    subb %sil, %al
   1752 ; AVX512BW-NEXT:    shrb %al
   1753 ; AVX512BW-NEXT:    addb %sil, %al
   1754 ; AVX512BW-NEXT:    shrb $2, %al
   1755 ; AVX512BW-NEXT:    mulb %cl
   1756 ; AVX512BW-NEXT:    subb %al, %dl
   1757 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1758 ; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
   1759 ; AVX512BW-NEXT:    vpextrb $4, %xmm2, %edx
   1760 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1761 ; AVX512BW-NEXT:    shrl $8, %esi
   1762 ; AVX512BW-NEXT:    movl %edx, %eax
   1763 ; AVX512BW-NEXT:    subb %sil, %al
   1764 ; AVX512BW-NEXT:    shrb %al
   1765 ; AVX512BW-NEXT:    addb %sil, %al
   1766 ; AVX512BW-NEXT:    shrb $2, %al
   1767 ; AVX512BW-NEXT:    mulb %cl
   1768 ; AVX512BW-NEXT:    subb %al, %dl
   1769 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1770 ; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
   1771 ; AVX512BW-NEXT:    vpextrb $5, %xmm2, %edx
   1772 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1773 ; AVX512BW-NEXT:    shrl $8, %esi
   1774 ; AVX512BW-NEXT:    movl %edx, %eax
   1775 ; AVX512BW-NEXT:    subb %sil, %al
   1776 ; AVX512BW-NEXT:    shrb %al
   1777 ; AVX512BW-NEXT:    addb %sil, %al
   1778 ; AVX512BW-NEXT:    shrb $2, %al
   1779 ; AVX512BW-NEXT:    mulb %cl
   1780 ; AVX512BW-NEXT:    subb %al, %dl
   1781 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1782 ; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
   1783 ; AVX512BW-NEXT:    vpextrb $6, %xmm2, %edx
   1784 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1785 ; AVX512BW-NEXT:    shrl $8, %esi
   1786 ; AVX512BW-NEXT:    movl %edx, %eax
   1787 ; AVX512BW-NEXT:    subb %sil, %al
   1788 ; AVX512BW-NEXT:    shrb %al
   1789 ; AVX512BW-NEXT:    addb %sil, %al
   1790 ; AVX512BW-NEXT:    shrb $2, %al
   1791 ; AVX512BW-NEXT:    mulb %cl
   1792 ; AVX512BW-NEXT:    subb %al, %dl
   1793 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1794 ; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
   1795 ; AVX512BW-NEXT:    vpextrb $7, %xmm2, %edx
   1796 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1797 ; AVX512BW-NEXT:    shrl $8, %esi
   1798 ; AVX512BW-NEXT:    movl %edx, %eax
   1799 ; AVX512BW-NEXT:    subb %sil, %al
   1800 ; AVX512BW-NEXT:    shrb %al
   1801 ; AVX512BW-NEXT:    addb %sil, %al
   1802 ; AVX512BW-NEXT:    shrb $2, %al
   1803 ; AVX512BW-NEXT:    mulb %cl
   1804 ; AVX512BW-NEXT:    subb %al, %dl
   1805 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1806 ; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
   1807 ; AVX512BW-NEXT:    vpextrb $8, %xmm2, %edx
   1808 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1809 ; AVX512BW-NEXT:    shrl $8, %esi
   1810 ; AVX512BW-NEXT:    movl %edx, %eax
   1811 ; AVX512BW-NEXT:    subb %sil, %al
   1812 ; AVX512BW-NEXT:    shrb %al
   1813 ; AVX512BW-NEXT:    addb %sil, %al
   1814 ; AVX512BW-NEXT:    shrb $2, %al
   1815 ; AVX512BW-NEXT:    mulb %cl
   1816 ; AVX512BW-NEXT:    subb %al, %dl
   1817 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1818 ; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
   1819 ; AVX512BW-NEXT:    vpextrb $9, %xmm2, %edx
   1820 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1821 ; AVX512BW-NEXT:    shrl $8, %esi
   1822 ; AVX512BW-NEXT:    movl %edx, %eax
   1823 ; AVX512BW-NEXT:    subb %sil, %al
   1824 ; AVX512BW-NEXT:    shrb %al
   1825 ; AVX512BW-NEXT:    addb %sil, %al
   1826 ; AVX512BW-NEXT:    shrb $2, %al
   1827 ; AVX512BW-NEXT:    mulb %cl
   1828 ; AVX512BW-NEXT:    subb %al, %dl
   1829 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1830 ; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
   1831 ; AVX512BW-NEXT:    vpextrb $10, %xmm2, %edx
   1832 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1833 ; AVX512BW-NEXT:    shrl $8, %esi
   1834 ; AVX512BW-NEXT:    movl %edx, %eax
   1835 ; AVX512BW-NEXT:    subb %sil, %al
   1836 ; AVX512BW-NEXT:    shrb %al
   1837 ; AVX512BW-NEXT:    addb %sil, %al
   1838 ; AVX512BW-NEXT:    shrb $2, %al
   1839 ; AVX512BW-NEXT:    mulb %cl
   1840 ; AVX512BW-NEXT:    subb %al, %dl
   1841 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1842 ; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
   1843 ; AVX512BW-NEXT:    vpextrb $11, %xmm2, %edx
   1844 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1845 ; AVX512BW-NEXT:    shrl $8, %esi
   1846 ; AVX512BW-NEXT:    movl %edx, %eax
   1847 ; AVX512BW-NEXT:    subb %sil, %al
   1848 ; AVX512BW-NEXT:    shrb %al
   1849 ; AVX512BW-NEXT:    addb %sil, %al
   1850 ; AVX512BW-NEXT:    shrb $2, %al
   1851 ; AVX512BW-NEXT:    mulb %cl
   1852 ; AVX512BW-NEXT:    subb %al, %dl
   1853 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1854 ; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
   1855 ; AVX512BW-NEXT:    vpextrb $12, %xmm2, %edx
   1856 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1857 ; AVX512BW-NEXT:    shrl $8, %esi
   1858 ; AVX512BW-NEXT:    movl %edx, %eax
   1859 ; AVX512BW-NEXT:    subb %sil, %al
   1860 ; AVX512BW-NEXT:    shrb %al
   1861 ; AVX512BW-NEXT:    addb %sil, %al
   1862 ; AVX512BW-NEXT:    shrb $2, %al
   1863 ; AVX512BW-NEXT:    mulb %cl
   1864 ; AVX512BW-NEXT:    subb %al, %dl
   1865 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1866 ; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
   1867 ; AVX512BW-NEXT:    vpextrb $13, %xmm2, %edx
   1868 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1869 ; AVX512BW-NEXT:    shrl $8, %esi
   1870 ; AVX512BW-NEXT:    movl %edx, %eax
   1871 ; AVX512BW-NEXT:    subb %sil, %al
   1872 ; AVX512BW-NEXT:    shrb %al
   1873 ; AVX512BW-NEXT:    addb %sil, %al
   1874 ; AVX512BW-NEXT:    shrb $2, %al
   1875 ; AVX512BW-NEXT:    mulb %cl
   1876 ; AVX512BW-NEXT:    subb %al, %dl
   1877 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1878 ; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
   1879 ; AVX512BW-NEXT:    vpextrb $14, %xmm2, %edx
   1880 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1881 ; AVX512BW-NEXT:    shrl $8, %esi
   1882 ; AVX512BW-NEXT:    movl %edx, %eax
   1883 ; AVX512BW-NEXT:    subb %sil, %al
   1884 ; AVX512BW-NEXT:    shrb %al
   1885 ; AVX512BW-NEXT:    addb %sil, %al
   1886 ; AVX512BW-NEXT:    shrb $2, %al
   1887 ; AVX512BW-NEXT:    mulb %cl
   1888 ; AVX512BW-NEXT:    subb %al, %dl
   1889 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1890 ; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
   1891 ; AVX512BW-NEXT:    vpextrb $15, %xmm2, %edx
   1892 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1893 ; AVX512BW-NEXT:    shrl $8, %esi
   1894 ; AVX512BW-NEXT:    movl %edx, %eax
   1895 ; AVX512BW-NEXT:    subb %sil, %al
   1896 ; AVX512BW-NEXT:    shrb %al
   1897 ; AVX512BW-NEXT:    addb %sil, %al
   1898 ; AVX512BW-NEXT:    shrb $2, %al
   1899 ; AVX512BW-NEXT:    mulb %cl
   1900 ; AVX512BW-NEXT:    subb %al, %dl
   1901 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1902 ; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm2
   1903 ; AVX512BW-NEXT:    vpextrb $1, %xmm0, %edx
   1904 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1905 ; AVX512BW-NEXT:    shrl $8, %esi
   1906 ; AVX512BW-NEXT:    movl %edx, %eax
   1907 ; AVX512BW-NEXT:    subb %sil, %al
   1908 ; AVX512BW-NEXT:    shrb %al
   1909 ; AVX512BW-NEXT:    addb %sil, %al
   1910 ; AVX512BW-NEXT:    shrb $2, %al
   1911 ; AVX512BW-NEXT:    mulb %cl
   1912 ; AVX512BW-NEXT:    subb %al, %dl
   1913 ; AVX512BW-NEXT:    movzbl %dl, %edx
   1914 ; AVX512BW-NEXT:    vpextrb $0, %xmm0, %esi
   1915 ; AVX512BW-NEXT:    imull $37, %esi, %edi
   1916 ; AVX512BW-NEXT:    shrl $8, %edi
   1917 ; AVX512BW-NEXT:    movl %esi, %eax
   1918 ; AVX512BW-NEXT:    subb %dil, %al
   1919 ; AVX512BW-NEXT:    shrb %al
   1920 ; AVX512BW-NEXT:    addb %dil, %al
   1921 ; AVX512BW-NEXT:    shrb $2, %al
   1922 ; AVX512BW-NEXT:    mulb %cl
   1923 ; AVX512BW-NEXT:    subb %al, %sil
   1924 ; AVX512BW-NEXT:    movzbl %sil, %eax
   1925 ; AVX512BW-NEXT:    vmovd %eax, %xmm3
   1926 ; AVX512BW-NEXT:    vpinsrb $1, %edx, %xmm3, %xmm3
   1927 ; AVX512BW-NEXT:    vpextrb $2, %xmm0, %edx
   1928 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1929 ; AVX512BW-NEXT:    shrl $8, %esi
   1930 ; AVX512BW-NEXT:    movl %edx, %eax
   1931 ; AVX512BW-NEXT:    subb %sil, %al
   1932 ; AVX512BW-NEXT:    shrb %al
   1933 ; AVX512BW-NEXT:    addb %sil, %al
   1934 ; AVX512BW-NEXT:    shrb $2, %al
   1935 ; AVX512BW-NEXT:    mulb %cl
   1936 ; AVX512BW-NEXT:    subb %al, %dl
   1937 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1938 ; AVX512BW-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
   1939 ; AVX512BW-NEXT:    vpextrb $3, %xmm0, %edx
   1940 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1941 ; AVX512BW-NEXT:    shrl $8, %esi
   1942 ; AVX512BW-NEXT:    movl %edx, %eax
   1943 ; AVX512BW-NEXT:    subb %sil, %al
   1944 ; AVX512BW-NEXT:    shrb %al
   1945 ; AVX512BW-NEXT:    addb %sil, %al
   1946 ; AVX512BW-NEXT:    shrb $2, %al
   1947 ; AVX512BW-NEXT:    mulb %cl
   1948 ; AVX512BW-NEXT:    subb %al, %dl
   1949 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1950 ; AVX512BW-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
   1951 ; AVX512BW-NEXT:    vpextrb $4, %xmm0, %edx
   1952 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1953 ; AVX512BW-NEXT:    shrl $8, %esi
   1954 ; AVX512BW-NEXT:    movl %edx, %eax
   1955 ; AVX512BW-NEXT:    subb %sil, %al
   1956 ; AVX512BW-NEXT:    shrb %al
   1957 ; AVX512BW-NEXT:    addb %sil, %al
   1958 ; AVX512BW-NEXT:    shrb $2, %al
   1959 ; AVX512BW-NEXT:    mulb %cl
   1960 ; AVX512BW-NEXT:    subb %al, %dl
   1961 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1962 ; AVX512BW-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
   1963 ; AVX512BW-NEXT:    vpextrb $5, %xmm0, %edx
   1964 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1965 ; AVX512BW-NEXT:    shrl $8, %esi
   1966 ; AVX512BW-NEXT:    movl %edx, %eax
   1967 ; AVX512BW-NEXT:    subb %sil, %al
   1968 ; AVX512BW-NEXT:    shrb %al
   1969 ; AVX512BW-NEXT:    addb %sil, %al
   1970 ; AVX512BW-NEXT:    shrb $2, %al
   1971 ; AVX512BW-NEXT:    mulb %cl
   1972 ; AVX512BW-NEXT:    subb %al, %dl
   1973 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1974 ; AVX512BW-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
   1975 ; AVX512BW-NEXT:    vpextrb $6, %xmm0, %edx
   1976 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1977 ; AVX512BW-NEXT:    shrl $8, %esi
   1978 ; AVX512BW-NEXT:    movl %edx, %eax
   1979 ; AVX512BW-NEXT:    subb %sil, %al
   1980 ; AVX512BW-NEXT:    shrb %al
   1981 ; AVX512BW-NEXT:    addb %sil, %al
   1982 ; AVX512BW-NEXT:    shrb $2, %al
   1983 ; AVX512BW-NEXT:    mulb %cl
   1984 ; AVX512BW-NEXT:    subb %al, %dl
   1985 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1986 ; AVX512BW-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
   1987 ; AVX512BW-NEXT:    vpextrb $7, %xmm0, %edx
   1988 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   1989 ; AVX512BW-NEXT:    shrl $8, %esi
   1990 ; AVX512BW-NEXT:    movl %edx, %eax
   1991 ; AVX512BW-NEXT:    subb %sil, %al
   1992 ; AVX512BW-NEXT:    shrb %al
   1993 ; AVX512BW-NEXT:    addb %sil, %al
   1994 ; AVX512BW-NEXT:    shrb $2, %al
   1995 ; AVX512BW-NEXT:    mulb %cl
   1996 ; AVX512BW-NEXT:    subb %al, %dl
   1997 ; AVX512BW-NEXT:    movzbl %dl, %eax
   1998 ; AVX512BW-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
   1999 ; AVX512BW-NEXT:    vpextrb $8, %xmm0, %edx
   2000 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   2001 ; AVX512BW-NEXT:    shrl $8, %esi
   2002 ; AVX512BW-NEXT:    movl %edx, %eax
   2003 ; AVX512BW-NEXT:    subb %sil, %al
   2004 ; AVX512BW-NEXT:    shrb %al
   2005 ; AVX512BW-NEXT:    addb %sil, %al
   2006 ; AVX512BW-NEXT:    shrb $2, %al
   2007 ; AVX512BW-NEXT:    mulb %cl
   2008 ; AVX512BW-NEXT:    subb %al, %dl
   2009 ; AVX512BW-NEXT:    movzbl %dl, %eax
   2010 ; AVX512BW-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
   2011 ; AVX512BW-NEXT:    vpextrb $9, %xmm0, %edx
   2012 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   2013 ; AVX512BW-NEXT:    shrl $8, %esi
   2014 ; AVX512BW-NEXT:    movl %edx, %eax
   2015 ; AVX512BW-NEXT:    subb %sil, %al
   2016 ; AVX512BW-NEXT:    shrb %al
   2017 ; AVX512BW-NEXT:    addb %sil, %al
   2018 ; AVX512BW-NEXT:    shrb $2, %al
   2019 ; AVX512BW-NEXT:    mulb %cl
   2020 ; AVX512BW-NEXT:    subb %al, %dl
   2021 ; AVX512BW-NEXT:    movzbl %dl, %eax
   2022 ; AVX512BW-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
   2023 ; AVX512BW-NEXT:    vpextrb $10, %xmm0, %edx
   2024 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   2025 ; AVX512BW-NEXT:    shrl $8, %esi
   2026 ; AVX512BW-NEXT:    movl %edx, %eax
   2027 ; AVX512BW-NEXT:    subb %sil, %al
   2028 ; AVX512BW-NEXT:    shrb %al
   2029 ; AVX512BW-NEXT:    addb %sil, %al
   2030 ; AVX512BW-NEXT:    shrb $2, %al
   2031 ; AVX512BW-NEXT:    mulb %cl
   2032 ; AVX512BW-NEXT:    subb %al, %dl
   2033 ; AVX512BW-NEXT:    movzbl %dl, %eax
   2034 ; AVX512BW-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
   2035 ; AVX512BW-NEXT:    vpextrb $11, %xmm0, %edx
   2036 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   2037 ; AVX512BW-NEXT:    shrl $8, %esi
   2038 ; AVX512BW-NEXT:    movl %edx, %eax
   2039 ; AVX512BW-NEXT:    subb %sil, %al
   2040 ; AVX512BW-NEXT:    shrb %al
   2041 ; AVX512BW-NEXT:    addb %sil, %al
   2042 ; AVX512BW-NEXT:    shrb $2, %al
   2043 ; AVX512BW-NEXT:    mulb %cl
   2044 ; AVX512BW-NEXT:    subb %al, %dl
   2045 ; AVX512BW-NEXT:    movzbl %dl, %eax
   2046 ; AVX512BW-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
   2047 ; AVX512BW-NEXT:    vpextrb $12, %xmm0, %edx
   2048 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   2049 ; AVX512BW-NEXT:    shrl $8, %esi
   2050 ; AVX512BW-NEXT:    movl %edx, %eax
   2051 ; AVX512BW-NEXT:    subb %sil, %al
   2052 ; AVX512BW-NEXT:    shrb %al
   2053 ; AVX512BW-NEXT:    addb %sil, %al
   2054 ; AVX512BW-NEXT:    shrb $2, %al
   2055 ; AVX512BW-NEXT:    mulb %cl
   2056 ; AVX512BW-NEXT:    subb %al, %dl
   2057 ; AVX512BW-NEXT:    movzbl %dl, %eax
   2058 ; AVX512BW-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
   2059 ; AVX512BW-NEXT:    vpextrb $13, %xmm0, %edx
   2060 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   2061 ; AVX512BW-NEXT:    shrl $8, %esi
   2062 ; AVX512BW-NEXT:    movl %edx, %eax
   2063 ; AVX512BW-NEXT:    subb %sil, %al
   2064 ; AVX512BW-NEXT:    shrb %al
   2065 ; AVX512BW-NEXT:    addb %sil, %al
   2066 ; AVX512BW-NEXT:    shrb $2, %al
   2067 ; AVX512BW-NEXT:    mulb %cl
   2068 ; AVX512BW-NEXT:    subb %al, %dl
   2069 ; AVX512BW-NEXT:    movzbl %dl, %eax
   2070 ; AVX512BW-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
   2071 ; AVX512BW-NEXT:    vpextrb $14, %xmm0, %edx
   2072 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   2073 ; AVX512BW-NEXT:    shrl $8, %esi
   2074 ; AVX512BW-NEXT:    movl %edx, %eax
   2075 ; AVX512BW-NEXT:    subb %sil, %al
   2076 ; AVX512BW-NEXT:    shrb %al
   2077 ; AVX512BW-NEXT:    addb %sil, %al
   2078 ; AVX512BW-NEXT:    shrb $2, %al
   2079 ; AVX512BW-NEXT:    mulb %cl
   2080 ; AVX512BW-NEXT:    subb %al, %dl
   2081 ; AVX512BW-NEXT:    movzbl %dl, %eax
   2082 ; AVX512BW-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
   2083 ; AVX512BW-NEXT:    vpextrb $15, %xmm0, %edx
   2084 ; AVX512BW-NEXT:    imull $37, %edx, %esi
   2085 ; AVX512BW-NEXT:    shrl $8, %esi
   2086 ; AVX512BW-NEXT:    movl %edx, %eax
   2087 ; AVX512BW-NEXT:    subb %sil, %al
   2088 ; AVX512BW-NEXT:    shrb %al
   2089 ; AVX512BW-NEXT:    addb %sil, %al
   2090 ; AVX512BW-NEXT:    shrb $2, %al
   2091 ; AVX512BW-NEXT:    mulb %cl
   2092 ; AVX512BW-NEXT:    subb %al, %dl
   2093 ; AVX512BW-NEXT:    movzbl %dl, %eax
   2094 ; AVX512BW-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm0
   2095 ; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
   2096 ; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
   2097 ; AVX512BW-NEXT:    retq
   2098   %res = urem <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
   2099   ret <64 x i8> %res
   2100 }
   2101