Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx  | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2
      5 
      6 ; fold (srem x, 1) -> 0
      7 define i32 @combine_srem_by_one(i32 %x) {
      8 ; CHECK-LABEL: combine_srem_by_one:
      9 ; CHECK:       # %bb.0:
     10 ; CHECK-NEXT:    xorl %eax, %eax
     11 ; CHECK-NEXT:    retq
     12   %1 = srem i32 %x, 1
     13   ret i32 %1
     14 }
     15 
     16 define <4 x i32> @combine_vec_srem_by_one(<4 x i32> %x) {
     17 ; SSE-LABEL: combine_vec_srem_by_one:
     18 ; SSE:       # %bb.0:
     19 ; SSE-NEXT:    xorps %xmm0, %xmm0
     20 ; SSE-NEXT:    retq
     21 ;
     22 ; AVX-LABEL: combine_vec_srem_by_one:
     23 ; AVX:       # %bb.0:
     24 ; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
     25 ; AVX-NEXT:    retq
     26   %1 = srem <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
     27   ret <4 x i32> %1
     28 }
     29 
     30 ; fold (srem x, -1) -> 0
     31 define i32 @combine_srem_by_negone(i32 %x) {
     32 ; CHECK-LABEL: combine_srem_by_negone:
     33 ; CHECK:       # %bb.0:
     34 ; CHECK-NEXT:    xorl %eax, %eax
     35 ; CHECK-NEXT:    retq
     36   %1 = srem i32 %x, -1
     37   ret i32 %1
     38 }
     39 
     40 define <4 x i32> @combine_vec_srem_by_negone(<4 x i32> %x) {
     41 ; SSE-LABEL: combine_vec_srem_by_negone:
     42 ; SSE:       # %bb.0:
     43 ; SSE-NEXT:    xorps %xmm0, %xmm0
     44 ; SSE-NEXT:    retq
     45 ;
     46 ; AVX-LABEL: combine_vec_srem_by_negone:
     47 ; AVX:       # %bb.0:
     48 ; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
     49 ; AVX-NEXT:    retq
     50   %1 = srem <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
     51   ret <4 x i32> %1
     52 }
     53 
     54 ; TODO fold (srem x, INT_MIN)
     55 define i32 @combine_srem_by_minsigned(i32 %x) {
     56 ; CHECK-LABEL: combine_srem_by_minsigned:
     57 ; CHECK:       # %bb.0:
     58 ; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
     59 ; CHECK-NEXT:    movl %edi, %eax
     60 ; CHECK-NEXT:    sarl $31, %eax
     61 ; CHECK-NEXT:    shrl %eax
     62 ; CHECK-NEXT:    addl %edi, %eax
     63 ; CHECK-NEXT:    andl $-2147483648, %eax # imm = 0x80000000
     64 ; CHECK-NEXT:    leal (%rax,%rdi), %eax
     65 ; CHECK-NEXT:    retq
     66   %1 = srem i32 %x, -2147483648
     67   ret i32 %1
     68 }
     69 
     70 define <4 x i32> @combine_vec_srem_by_minsigned(<4 x i32> %x) {
     71 ; SSE-LABEL: combine_vec_srem_by_minsigned:
     72 ; SSE:       # %bb.0:
     73 ; SSE-NEXT:    movdqa %xmm0, %xmm1
     74 ; SSE-NEXT:    psrad $31, %xmm1
     75 ; SSE-NEXT:    psrld $1, %xmm1
     76 ; SSE-NEXT:    paddd %xmm0, %xmm1
     77 ; SSE-NEXT:    pand {{.*}}(%rip), %xmm1
     78 ; SSE-NEXT:    psubd %xmm1, %xmm0
     79 ; SSE-NEXT:    retq
     80 ;
     81 ; AVX1-LABEL: combine_vec_srem_by_minsigned:
     82 ; AVX1:       # %bb.0:
     83 ; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
     84 ; AVX1-NEXT:    vpsrld $1, %xmm1, %xmm1
     85 ; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
     86 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
     87 ; AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
     88 ; AVX1-NEXT:    retq
     89 ;
     90 ; AVX2-LABEL: combine_vec_srem_by_minsigned:
     91 ; AVX2:       # %bb.0:
     92 ; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm1
     93 ; AVX2-NEXT:    vpsrld $1, %xmm1, %xmm1
     94 ; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
     95 ; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
     96 ; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
     97 ; AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
     98 ; AVX2-NEXT:    retq
     99   %1 = srem <4 x i32> %x, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
    100   ret <4 x i32> %1
    101 }
    102 
    103 ; TODO fold (srem x, x) -> 0
    104 define i32 @combine_srem_dupe(i32 %x) {
    105 ; CHECK-LABEL: combine_srem_dupe:
    106 ; CHECK:       # %bb.0:
    107 ; CHECK-NEXT:    movl %edi, %eax
    108 ; CHECK-NEXT:    cltd
    109 ; CHECK-NEXT:    idivl %edi
    110 ; CHECK-NEXT:    movl %edx, %eax
    111 ; CHECK-NEXT:    retq
    112   %1 = srem i32 %x, %x
    113   ret i32 %1
    114 }
    115 
    116 define <4 x i32> @combine_vec_srem_dupe(<4 x i32> %x) {
    117 ; SSE-LABEL: combine_vec_srem_dupe:
    118 ; SSE:       # %bb.0:
    119 ; SSE-NEXT:    pextrd $1, %xmm0, %ecx
    120 ; SSE-NEXT:    movl %ecx, %eax
    121 ; SSE-NEXT:    cltd
    122 ; SSE-NEXT:    idivl %ecx
    123 ; SSE-NEXT:    movl %edx, %ecx
    124 ; SSE-NEXT:    movd %xmm0, %esi
    125 ; SSE-NEXT:    movl %esi, %eax
    126 ; SSE-NEXT:    cltd
    127 ; SSE-NEXT:    idivl %esi
    128 ; SSE-NEXT:    movd %edx, %xmm1
    129 ; SSE-NEXT:    pinsrd $1, %ecx, %xmm1
    130 ; SSE-NEXT:    pextrd $2, %xmm0, %ecx
    131 ; SSE-NEXT:    movl %ecx, %eax
    132 ; SSE-NEXT:    cltd
    133 ; SSE-NEXT:    idivl %ecx
    134 ; SSE-NEXT:    pinsrd $2, %edx, %xmm1
    135 ; SSE-NEXT:    pextrd $3, %xmm0, %ecx
    136 ; SSE-NEXT:    movl %ecx, %eax
    137 ; SSE-NEXT:    cltd
    138 ; SSE-NEXT:    idivl %ecx
    139 ; SSE-NEXT:    pinsrd $3, %edx, %xmm1
    140 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    141 ; SSE-NEXT:    retq
    142 ;
    143 ; AVX-LABEL: combine_vec_srem_dupe:
    144 ; AVX:       # %bb.0:
    145 ; AVX-NEXT:    vpextrd $1, %xmm0, %ecx
    146 ; AVX-NEXT:    movl %ecx, %eax
    147 ; AVX-NEXT:    cltd
    148 ; AVX-NEXT:    idivl %ecx
    149 ; AVX-NEXT:    movl %edx, %ecx
    150 ; AVX-NEXT:    vmovd %xmm0, %esi
    151 ; AVX-NEXT:    movl %esi, %eax
    152 ; AVX-NEXT:    cltd
    153 ; AVX-NEXT:    idivl %esi
    154 ; AVX-NEXT:    vmovd %edx, %xmm1
    155 ; AVX-NEXT:    vpinsrd $1, %ecx, %xmm1, %xmm1
    156 ; AVX-NEXT:    vpextrd $2, %xmm0, %ecx
    157 ; AVX-NEXT:    movl %ecx, %eax
    158 ; AVX-NEXT:    cltd
    159 ; AVX-NEXT:    idivl %ecx
    160 ; AVX-NEXT:    vpinsrd $2, %edx, %xmm1, %xmm1
    161 ; AVX-NEXT:    vpextrd $3, %xmm0, %ecx
    162 ; AVX-NEXT:    movl %ecx, %eax
    163 ; AVX-NEXT:    cltd
    164 ; AVX-NEXT:    idivl %ecx
    165 ; AVX-NEXT:    vpinsrd $3, %edx, %xmm1, %xmm0
    166 ; AVX-NEXT:    retq
    167   %1 = srem <4 x i32> %x, %x
    168   ret <4 x i32> %1
    169 }
    170 
    171 ; fold (srem x, y) -> (urem x, y) iff x and y are positive
    172 define <4 x i32> @combine_vec_srem_by_pos0(<4 x i32> %x) {
    173 ; SSE-LABEL: combine_vec_srem_by_pos0:
    174 ; SSE:       # %bb.0:
    175 ; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
    176 ; SSE-NEXT:    retq
    177 ;
    178 ; AVX1-LABEL: combine_vec_srem_by_pos0:
    179 ; AVX1:       # %bb.0:
    180 ; AVX1-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
    181 ; AVX1-NEXT:    retq
    182 ;
    183 ; AVX2-LABEL: combine_vec_srem_by_pos0:
    184 ; AVX2:       # %bb.0:
    185 ; AVX2-NEXT:    vbroadcastss {{.*#+}} xmm1 = [3,3,3,3]
    186 ; AVX2-NEXT:    vandps %xmm1, %xmm0, %xmm0
    187 ; AVX2-NEXT:    retq
    188   %1 = and <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
    189   %2 = srem <4 x i32> %1, <i32 4, i32 4, i32 4, i32 4>
    190   ret <4 x i32> %2
    191 }
    192 
    193 define <4 x i32> @combine_vec_srem_by_pos1(<4 x i32> %x) {
    194 ; SSE-LABEL: combine_vec_srem_by_pos1:
    195 ; SSE:       # %bb.0:
    196 ; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
    197 ; SSE-NEXT:    retq
    198 ;
    199 ; AVX-LABEL: combine_vec_srem_by_pos1:
    200 ; AVX:       # %bb.0:
    201 ; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
    202 ; AVX-NEXT:    retq
    203   %1 = and <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
    204   %2 = srem <4 x i32> %1, <i32 1, i32 4, i32 8, i32 16>
    205   ret <4 x i32> %2
    206 }
    207 
    208 ; fold (srem x, (1 << c)) -> x - (x / (1 << c)) * (1 << c).
    209 define <4 x i32> @combine_vec_srem_by_pow2a(<4 x i32> %x) {
    210 ; SSE-LABEL: combine_vec_srem_by_pow2a:
    211 ; SSE:       # %bb.0:
    212 ; SSE-NEXT:    movdqa %xmm0, %xmm1
    213 ; SSE-NEXT:    psrad $31, %xmm1
    214 ; SSE-NEXT:    psrld $30, %xmm1
    215 ; SSE-NEXT:    paddd %xmm0, %xmm1
    216 ; SSE-NEXT:    pand {{.*}}(%rip), %xmm1
    217 ; SSE-NEXT:    psubd %xmm1, %xmm0
    218 ; SSE-NEXT:    retq
    219 ;
    220 ; AVX1-LABEL: combine_vec_srem_by_pow2a:
    221 ; AVX1:       # %bb.0:
    222 ; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
    223 ; AVX1-NEXT:    vpsrld $30, %xmm1, %xmm1
    224 ; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
    225 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
    226 ; AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
    227 ; AVX1-NEXT:    retq
    228 ;
    229 ; AVX2-LABEL: combine_vec_srem_by_pow2a:
    230 ; AVX2:       # %bb.0:
    231 ; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm1
    232 ; AVX2-NEXT:    vpsrld $30, %xmm1, %xmm1
    233 ; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
    234 ; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [4294967292,4294967292,4294967292,4294967292]
    235 ; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
    236 ; AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
    237 ; AVX2-NEXT:    retq
    238   %1 = srem <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4>
    239   ret <4 x i32> %1
    240 }
    241 
    242 define <4 x i32> @combine_vec_srem_by_pow2a_neg(<4 x i32> %x) {
    243 ; SSE-LABEL: combine_vec_srem_by_pow2a_neg:
    244 ; SSE:       # %bb.0:
    245 ; SSE-NEXT:    movdqa %xmm0, %xmm1
    246 ; SSE-NEXT:    psrad $31, %xmm1
    247 ; SSE-NEXT:    psrld $30, %xmm1
    248 ; SSE-NEXT:    paddd %xmm0, %xmm1
    249 ; SSE-NEXT:    psrad $2, %xmm1
    250 ; SSE-NEXT:    pxor %xmm2, %xmm2
    251 ; SSE-NEXT:    psubd %xmm1, %xmm2
    252 ; SSE-NEXT:    pslld $2, %xmm2
    253 ; SSE-NEXT:    paddd %xmm2, %xmm0
    254 ; SSE-NEXT:    retq
    255 ;
    256 ; AVX-LABEL: combine_vec_srem_by_pow2a_neg:
    257 ; AVX:       # %bb.0:
    258 ; AVX-NEXT:    vpsrad $31, %xmm0, %xmm1
    259 ; AVX-NEXT:    vpsrld $30, %xmm1, %xmm1
    260 ; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
    261 ; AVX-NEXT:    vpsrad $2, %xmm1, %xmm1
    262 ; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
    263 ; AVX-NEXT:    vpsubd %xmm1, %xmm2, %xmm1
    264 ; AVX-NEXT:    vpslld $2, %xmm1, %xmm1
    265 ; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
    266 ; AVX-NEXT:    retq
    267   %1 = srem <4 x i32> %x, <i32 -4, i32 -4, i32 -4, i32 -4>
    268   ret <4 x i32> %1
    269 }
    270 
    271 define <4 x i32> @combine_vec_srem_by_pow2b(<4 x i32> %x) {
    272 ; SSE-LABEL: combine_vec_srem_by_pow2b:
    273 ; SSE:       # %bb.0:
    274 ; SSE-NEXT:    movdqa %xmm0, %xmm1
    275 ; SSE-NEXT:    psrld $31, %xmm1
    276 ; SSE-NEXT:    movdqa %xmm0, %xmm2
    277 ; SSE-NEXT:    psrad $31, %xmm2
    278 ; SSE-NEXT:    movdqa %xmm2, %xmm3
    279 ; SSE-NEXT:    psrld $29, %xmm3
    280 ; SSE-NEXT:    pblendw {{.*#+}} xmm3 = xmm1[0,1,2,3],xmm3[4,5,6,7]
    281 ; SSE-NEXT:    psrld $30, %xmm2
    282 ; SSE-NEXT:    pxor %xmm1, %xmm1
    283 ; SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
    284 ; SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
    285 ; SSE-NEXT:    paddd %xmm0, %xmm1
    286 ; SSE-NEXT:    movdqa %xmm1, %xmm2
    287 ; SSE-NEXT:    movdqa %xmm1, %xmm3
    288 ; SSE-NEXT:    psrad $2, %xmm3
    289 ; SSE-NEXT:    pblendw {{.*#+}} xmm3 = xmm1[0,1,2,3],xmm3[4,5,6,7]
    290 ; SSE-NEXT:    psrad $3, %xmm1
    291 ; SSE-NEXT:    psrad $1, %xmm2
    292 ; SSE-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
    293 ; SSE-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
    294 ; SSE-NEXT:    pblendw {{.*#+}} xmm3 = xmm0[0,1],xmm3[2,3,4,5,6,7]
    295 ; SSE-NEXT:    pmulld {{.*}}(%rip), %xmm3
    296 ; SSE-NEXT:    psubd %xmm3, %xmm0
    297 ; SSE-NEXT:    retq
    298 ;
    299 ; AVX1-LABEL: combine_vec_srem_by_pow2b:
    300 ; AVX1:       # %bb.0:
    301 ; AVX1-NEXT:    vpsrld $31, %xmm0, %xmm1
    302 ; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm2
    303 ; AVX1-NEXT:    vpsrld $29, %xmm2, %xmm3
    304 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
    305 ; AVX1-NEXT:    vpsrld $30, %xmm2, %xmm2
    306 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
    307 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
    308 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
    309 ; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
    310 ; AVX1-NEXT:    vpsrad $3, %xmm1, %xmm2
    311 ; AVX1-NEXT:    vpsrad $1, %xmm1, %xmm3
    312 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
    313 ; AVX1-NEXT:    vpsrad $2, %xmm1, %xmm3
    314 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
    315 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
    316 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
    317 ; AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
    318 ; AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
    319 ; AVX1-NEXT:    retq
    320 ;
    321 ; AVX2-LABEL: combine_vec_srem_by_pow2b:
    322 ; AVX2:       # %bb.0:
    323 ; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm1
    324 ; AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
    325 ; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
    326 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,2,3]
    327 ; AVX2-NEXT:    vpsravd %xmm2, %xmm1, %xmm1
    328 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
    329 ; AVX2-NEXT:    vpsllvd %xmm2, %xmm1, %xmm1
    330 ; AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
    331 ; AVX2-NEXT:    retq
    332   %1 = srem <4 x i32> %x, <i32 1, i32 2, i32 4, i32 8>
    333   ret <4 x i32> %1
    334 }
    335 
    336 define <4 x i32> @combine_vec_srem_by_pow2b_neg(<4 x i32> %x) {
    337 ; SSE-LABEL: combine_vec_srem_by_pow2b_neg:
    338 ; SSE:       # %bb.0:
    339 ; SSE-NEXT:    movdqa %xmm0, %xmm1
    340 ; SSE-NEXT:    psrad $31, %xmm1
    341 ; SSE-NEXT:    movdqa %xmm1, %xmm2
    342 ; SSE-NEXT:    psrld $28, %xmm2
    343 ; SSE-NEXT:    movdqa %xmm1, %xmm3
    344 ; SSE-NEXT:    psrld $30, %xmm3
    345 ; SSE-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
    346 ; SSE-NEXT:    movdqa %xmm0, %xmm2
    347 ; SSE-NEXT:    psrld $31, %xmm2
    348 ; SSE-NEXT:    psrld $29, %xmm1
    349 ; SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
    350 ; SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
    351 ; SSE-NEXT:    paddd %xmm0, %xmm1
    352 ; SSE-NEXT:    movdqa %xmm1, %xmm2
    353 ; SSE-NEXT:    psrad $4, %xmm2
    354 ; SSE-NEXT:    movdqa %xmm1, %xmm3
    355 ; SSE-NEXT:    psrad $2, %xmm3
    356 ; SSE-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
    357 ; SSE-NEXT:    movdqa %xmm1, %xmm2
    358 ; SSE-NEXT:    psrad $3, %xmm2
    359 ; SSE-NEXT:    psrad $1, %xmm1
    360 ; SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
    361 ; SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
    362 ; SSE-NEXT:    pmulld {{.*}}(%rip), %xmm1
    363 ; SSE-NEXT:    paddd %xmm0, %xmm1
    364 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    365 ; SSE-NEXT:    retq
    366 ;
    367 ; AVX1-LABEL: combine_vec_srem_by_pow2b_neg:
    368 ; AVX1:       # %bb.0:
    369 ; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
    370 ; AVX1-NEXT:    vpsrld $28, %xmm1, %xmm2
    371 ; AVX1-NEXT:    vpsrld $30, %xmm1, %xmm3
    372 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
    373 ; AVX1-NEXT:    vpsrld $31, %xmm0, %xmm3
    374 ; AVX1-NEXT:    vpsrld $29, %xmm1, %xmm1
    375 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7]
    376 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
    377 ; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
    378 ; AVX1-NEXT:    vpsrad $4, %xmm1, %xmm2
    379 ; AVX1-NEXT:    vpsrad $2, %xmm1, %xmm3
    380 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
    381 ; AVX1-NEXT:    vpsrad $3, %xmm1, %xmm3
    382 ; AVX1-NEXT:    vpsrad $1, %xmm1, %xmm1
    383 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
    384 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
    385 ; AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
    386 ; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
    387 ; AVX1-NEXT:    retq
    388 ;
    389 ; AVX2-LABEL: combine_vec_srem_by_pow2b_neg:
    390 ; AVX2:       # %bb.0:
    391 ; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm1
    392 ; AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
    393 ; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
    394 ; AVX2-NEXT:    vpsravd {{.*}}(%rip), %xmm1, %xmm1
    395 ; AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
    396 ; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
    397 ; AVX2-NEXT:    retq
    398   %1 = srem <4 x i32> %x, <i32 -2, i32 -4, i32 -8, i32 -16>
    399   ret <4 x i32> %1
    400 }
    401 
    402 ; OSS-Fuzz #6883
    403 ; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=6883
    404 define i32 @ossfuzz6883() {
    405 ; CHECK-LABEL: ossfuzz6883:
    406 ; CHECK:       # %bb.0:
    407 ; CHECK-NEXT:    movl (%rax), %ecx
    408 ; CHECK-NEXT:    movl %ecx, %eax
    409 ; CHECK-NEXT:    cltd
    410 ; CHECK-NEXT:    idivl %ecx
    411 ; CHECK-NEXT:    movl %edx, %esi
    412 ; CHECK-NEXT:    movl $1, %edi
    413 ; CHECK-NEXT:    cltd
    414 ; CHECK-NEXT:    idivl %edi
    415 ; CHECK-NEXT:    movl %edx, %edi
    416 ; CHECK-NEXT:    xorl %edx, %edx
    417 ; CHECK-NEXT:    movl %ecx, %eax
    418 ; CHECK-NEXT:    divl %edi
    419 ; CHECK-NEXT:    andl %esi, %eax
    420 ; CHECK-NEXT:    retq
    421   %B17 = or i32 0, 2147483647
    422   %L6 = load i32, i32* undef
    423   %B11 = sdiv i32 %L6, %L6
    424   %B13 = udiv i32 %B17, %B17
    425   %B14 = srem i32 %B11, %B13
    426   %B16 = srem i32 %L6, %L6
    427   %B10 = udiv i32 %L6, %B14
    428   %B6 = and i32 %B16, %B10
    429   ret i32 %B6
    430 }
    431