1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2 5 6 ; fold (srem x, 1) -> 0 7 define i32 @combine_srem_by_one(i32 %x) { 8 ; CHECK-LABEL: combine_srem_by_one: 9 ; CHECK: # %bb.0: 10 ; CHECK-NEXT: xorl %eax, %eax 11 ; CHECK-NEXT: retq 12 %1 = srem i32 %x, 1 13 ret i32 %1 14 } 15 16 define <4 x i32> @combine_vec_srem_by_one(<4 x i32> %x) { 17 ; SSE-LABEL: combine_vec_srem_by_one: 18 ; SSE: # %bb.0: 19 ; SSE-NEXT: xorps %xmm0, %xmm0 20 ; SSE-NEXT: retq 21 ; 22 ; AVX-LABEL: combine_vec_srem_by_one: 23 ; AVX: # %bb.0: 24 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 25 ; AVX-NEXT: retq 26 %1 = srem <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> 27 ret <4 x i32> %1 28 } 29 30 ; fold (srem x, -1) -> 0 31 define i32 @combine_srem_by_negone(i32 %x) { 32 ; CHECK-LABEL: combine_srem_by_negone: 33 ; CHECK: # %bb.0: 34 ; CHECK-NEXT: xorl %eax, %eax 35 ; CHECK-NEXT: retq 36 %1 = srem i32 %x, -1 37 ret i32 %1 38 } 39 40 define <4 x i32> @combine_vec_srem_by_negone(<4 x i32> %x) { 41 ; SSE-LABEL: combine_vec_srem_by_negone: 42 ; SSE: # %bb.0: 43 ; SSE-NEXT: xorps %xmm0, %xmm0 44 ; SSE-NEXT: retq 45 ; 46 ; AVX-LABEL: combine_vec_srem_by_negone: 47 ; AVX: # %bb.0: 48 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 49 ; AVX-NEXT: retq 50 %1 = srem <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> 51 ret <4 x i32> %1 52 } 53 54 ; TODO fold (srem x, INT_MIN) 55 define i32 @combine_srem_by_minsigned(i32 %x) { 56 ; CHECK-LABEL: combine_srem_by_minsigned: 57 ; CHECK: # %bb.0: 58 ; CHECK-NEXT: # kill: def $edi killed $edi def $rdi 59 ; CHECK-NEXT: movl %edi, %eax 60 ; CHECK-NEXT: sarl $31, %eax 61 ; CHECK-NEXT: shrl %eax 62 ; CHECK-NEXT: addl %edi, %eax 63 ; CHECK-NEXT: andl $-2147483648, %eax # imm = 0x80000000 64 ; CHECK-NEXT: leal (%rax,%rdi), %eax 65 ; CHECK-NEXT: retq 66 %1 = srem i32 %x, -2147483648 67 ret i32 %1 68 } 69 70 define <4 x i32> @combine_vec_srem_by_minsigned(<4 x i32> %x) { 71 ; SSE-LABEL: combine_vec_srem_by_minsigned: 72 ; SSE: # %bb.0: 73 ; SSE-NEXT: movdqa %xmm0, %xmm1 74 ; SSE-NEXT: psrad $31, %xmm1 75 ; SSE-NEXT: psrld $1, %xmm1 76 ; SSE-NEXT: paddd %xmm0, %xmm1 77 ; SSE-NEXT: pand {{.*}}(%rip), %xmm1 78 ; SSE-NEXT: psubd %xmm1, %xmm0 79 ; SSE-NEXT: retq 80 ; 81 ; AVX1-LABEL: combine_vec_srem_by_minsigned: 82 ; AVX1: # %bb.0: 83 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1 84 ; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1 85 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1 86 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 87 ; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 88 ; AVX1-NEXT: retq 89 ; 90 ; AVX2-LABEL: combine_vec_srem_by_minsigned: 91 ; AVX2: # %bb.0: 92 ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm1 93 ; AVX2-NEXT: vpsrld $1, %xmm1, %xmm1 94 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1 95 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 96 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 97 ; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 98 ; AVX2-NEXT: retq 99 %1 = srem <4 x i32> %x, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648> 100 ret <4 x i32> %1 101 } 102 103 ; TODO fold (srem x, x) -> 0 104 define i32 @combine_srem_dupe(i32 %x) { 105 ; CHECK-LABEL: combine_srem_dupe: 106 ; CHECK: # %bb.0: 107 ; CHECK-NEXT: movl %edi, %eax 108 ; CHECK-NEXT: cltd 109 ; CHECK-NEXT: idivl %edi 110 ; CHECK-NEXT: movl %edx, %eax 111 ; CHECK-NEXT: retq 112 %1 = srem i32 %x, %x 113 ret i32 %1 114 } 115 116 define <4 x i32> @combine_vec_srem_dupe(<4 x i32> %x) { 117 ; SSE-LABEL: combine_vec_srem_dupe: 118 ; SSE: # %bb.0: 119 ; SSE-NEXT: pextrd $1, %xmm0, %ecx 120 ; SSE-NEXT: movl %ecx, %eax 121 ; SSE-NEXT: cltd 122 ; SSE-NEXT: idivl %ecx 123 ; SSE-NEXT: movl %edx, %ecx 124 ; SSE-NEXT: movd %xmm0, %esi 125 ; SSE-NEXT: movl %esi, %eax 126 ; SSE-NEXT: cltd 127 ; SSE-NEXT: idivl %esi 128 ; SSE-NEXT: movd %edx, %xmm1 129 ; SSE-NEXT: pinsrd $1, %ecx, %xmm1 130 ; SSE-NEXT: pextrd $2, %xmm0, %ecx 131 ; SSE-NEXT: movl %ecx, %eax 132 ; SSE-NEXT: cltd 133 ; SSE-NEXT: idivl %ecx 134 ; SSE-NEXT: pinsrd $2, %edx, %xmm1 135 ; SSE-NEXT: pextrd $3, %xmm0, %ecx 136 ; SSE-NEXT: movl %ecx, %eax 137 ; SSE-NEXT: cltd 138 ; SSE-NEXT: idivl %ecx 139 ; SSE-NEXT: pinsrd $3, %edx, %xmm1 140 ; SSE-NEXT: movdqa %xmm1, %xmm0 141 ; SSE-NEXT: retq 142 ; 143 ; AVX-LABEL: combine_vec_srem_dupe: 144 ; AVX: # %bb.0: 145 ; AVX-NEXT: vpextrd $1, %xmm0, %ecx 146 ; AVX-NEXT: movl %ecx, %eax 147 ; AVX-NEXT: cltd 148 ; AVX-NEXT: idivl %ecx 149 ; AVX-NEXT: movl %edx, %ecx 150 ; AVX-NEXT: vmovd %xmm0, %esi 151 ; AVX-NEXT: movl %esi, %eax 152 ; AVX-NEXT: cltd 153 ; AVX-NEXT: idivl %esi 154 ; AVX-NEXT: vmovd %edx, %xmm1 155 ; AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 156 ; AVX-NEXT: vpextrd $2, %xmm0, %ecx 157 ; AVX-NEXT: movl %ecx, %eax 158 ; AVX-NEXT: cltd 159 ; AVX-NEXT: idivl %ecx 160 ; AVX-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1 161 ; AVX-NEXT: vpextrd $3, %xmm0, %ecx 162 ; AVX-NEXT: movl %ecx, %eax 163 ; AVX-NEXT: cltd 164 ; AVX-NEXT: idivl %ecx 165 ; AVX-NEXT: vpinsrd $3, %edx, %xmm1, %xmm0 166 ; AVX-NEXT: retq 167 %1 = srem <4 x i32> %x, %x 168 ret <4 x i32> %1 169 } 170 171 ; fold (srem x, y) -> (urem x, y) iff x and y are positive 172 define <4 x i32> @combine_vec_srem_by_pos0(<4 x i32> %x) { 173 ; SSE-LABEL: combine_vec_srem_by_pos0: 174 ; SSE: # %bb.0: 175 ; SSE-NEXT: andps {{.*}}(%rip), %xmm0 176 ; SSE-NEXT: retq 177 ; 178 ; AVX1-LABEL: combine_vec_srem_by_pos0: 179 ; AVX1: # %bb.0: 180 ; AVX1-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 181 ; AVX1-NEXT: retq 182 ; 183 ; AVX2-LABEL: combine_vec_srem_by_pos0: 184 ; AVX2: # %bb.0: 185 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [3,3,3,3] 186 ; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0 187 ; AVX2-NEXT: retq 188 %1 = and <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255> 189 %2 = srem <4 x i32> %1, <i32 4, i32 4, i32 4, i32 4> 190 ret <4 x i32> %2 191 } 192 193 define <4 x i32> @combine_vec_srem_by_pos1(<4 x i32> %x) { 194 ; SSE-LABEL: combine_vec_srem_by_pos1: 195 ; SSE: # %bb.0: 196 ; SSE-NEXT: andps {{.*}}(%rip), %xmm0 197 ; SSE-NEXT: retq 198 ; 199 ; AVX-LABEL: combine_vec_srem_by_pos1: 200 ; AVX: # %bb.0: 201 ; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 202 ; AVX-NEXT: retq 203 %1 = and <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255> 204 %2 = srem <4 x i32> %1, <i32 1, i32 4, i32 8, i32 16> 205 ret <4 x i32> %2 206 } 207 208 ; fold (srem x, (1 << c)) -> x - (x / (1 << c)) * (1 << c). 209 define <4 x i32> @combine_vec_srem_by_pow2a(<4 x i32> %x) { 210 ; SSE-LABEL: combine_vec_srem_by_pow2a: 211 ; SSE: # %bb.0: 212 ; SSE-NEXT: movdqa %xmm0, %xmm1 213 ; SSE-NEXT: psrad $31, %xmm1 214 ; SSE-NEXT: psrld $30, %xmm1 215 ; SSE-NEXT: paddd %xmm0, %xmm1 216 ; SSE-NEXT: pand {{.*}}(%rip), %xmm1 217 ; SSE-NEXT: psubd %xmm1, %xmm0 218 ; SSE-NEXT: retq 219 ; 220 ; AVX1-LABEL: combine_vec_srem_by_pow2a: 221 ; AVX1: # %bb.0: 222 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1 223 ; AVX1-NEXT: vpsrld $30, %xmm1, %xmm1 224 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1 225 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 226 ; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 227 ; AVX1-NEXT: retq 228 ; 229 ; AVX2-LABEL: combine_vec_srem_by_pow2a: 230 ; AVX2: # %bb.0: 231 ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm1 232 ; AVX2-NEXT: vpsrld $30, %xmm1, %xmm1 233 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1 234 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [4294967292,4294967292,4294967292,4294967292] 235 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 236 ; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 237 ; AVX2-NEXT: retq 238 %1 = srem <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4> 239 ret <4 x i32> %1 240 } 241 242 define <4 x i32> @combine_vec_srem_by_pow2a_neg(<4 x i32> %x) { 243 ; SSE-LABEL: combine_vec_srem_by_pow2a_neg: 244 ; SSE: # %bb.0: 245 ; SSE-NEXT: movdqa %xmm0, %xmm1 246 ; SSE-NEXT: psrad $31, %xmm1 247 ; SSE-NEXT: psrld $30, %xmm1 248 ; SSE-NEXT: paddd %xmm0, %xmm1 249 ; SSE-NEXT: psrad $2, %xmm1 250 ; SSE-NEXT: pxor %xmm2, %xmm2 251 ; SSE-NEXT: psubd %xmm1, %xmm2 252 ; SSE-NEXT: pslld $2, %xmm2 253 ; SSE-NEXT: paddd %xmm2, %xmm0 254 ; SSE-NEXT: retq 255 ; 256 ; AVX-LABEL: combine_vec_srem_by_pow2a_neg: 257 ; AVX: # %bb.0: 258 ; AVX-NEXT: vpsrad $31, %xmm0, %xmm1 259 ; AVX-NEXT: vpsrld $30, %xmm1, %xmm1 260 ; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm1 261 ; AVX-NEXT: vpsrad $2, %xmm1, %xmm1 262 ; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 263 ; AVX-NEXT: vpsubd %xmm1, %xmm2, %xmm1 264 ; AVX-NEXT: vpslld $2, %xmm1, %xmm1 265 ; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 266 ; AVX-NEXT: retq 267 %1 = srem <4 x i32> %x, <i32 -4, i32 -4, i32 -4, i32 -4> 268 ret <4 x i32> %1 269 } 270 271 define <4 x i32> @combine_vec_srem_by_pow2b(<4 x i32> %x) { 272 ; SSE-LABEL: combine_vec_srem_by_pow2b: 273 ; SSE: # %bb.0: 274 ; SSE-NEXT: movdqa %xmm0, %xmm1 275 ; SSE-NEXT: psrld $31, %xmm1 276 ; SSE-NEXT: movdqa %xmm0, %xmm2 277 ; SSE-NEXT: psrad $31, %xmm2 278 ; SSE-NEXT: movdqa %xmm2, %xmm3 279 ; SSE-NEXT: psrld $29, %xmm3 280 ; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm1[0,1,2,3],xmm3[4,5,6,7] 281 ; SSE-NEXT: psrld $30, %xmm2 282 ; SSE-NEXT: pxor %xmm1, %xmm1 283 ; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 284 ; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 285 ; SSE-NEXT: paddd %xmm0, %xmm1 286 ; SSE-NEXT: movdqa %xmm1, %xmm2 287 ; SSE-NEXT: movdqa %xmm1, %xmm3 288 ; SSE-NEXT: psrad $2, %xmm3 289 ; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm1[0,1,2,3],xmm3[4,5,6,7] 290 ; SSE-NEXT: psrad $3, %xmm1 291 ; SSE-NEXT: psrad $1, %xmm2 292 ; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] 293 ; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7] 294 ; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm0[0,1],xmm3[2,3,4,5,6,7] 295 ; SSE-NEXT: pmulld {{.*}}(%rip), %xmm3 296 ; SSE-NEXT: psubd %xmm3, %xmm0 297 ; SSE-NEXT: retq 298 ; 299 ; AVX1-LABEL: combine_vec_srem_by_pow2b: 300 ; AVX1: # %bb.0: 301 ; AVX1-NEXT: vpsrld $31, %xmm0, %xmm1 302 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 303 ; AVX1-NEXT: vpsrld $29, %xmm2, %xmm3 304 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] 305 ; AVX1-NEXT: vpsrld $30, %xmm2, %xmm2 306 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 307 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 308 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 309 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1 310 ; AVX1-NEXT: vpsrad $3, %xmm1, %xmm2 311 ; AVX1-NEXT: vpsrad $1, %xmm1, %xmm3 312 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 313 ; AVX1-NEXT: vpsrad $2, %xmm1, %xmm3 314 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] 315 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 316 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7] 317 ; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 318 ; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 319 ; AVX1-NEXT: retq 320 ; 321 ; AVX2-LABEL: combine_vec_srem_by_pow2b: 322 ; AVX2: # %bb.0: 323 ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm1 324 ; AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 325 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1 326 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3] 327 ; AVX2-NEXT: vpsravd %xmm2, %xmm1, %xmm1 328 ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 329 ; AVX2-NEXT: vpsllvd %xmm2, %xmm1, %xmm1 330 ; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 331 ; AVX2-NEXT: retq 332 %1 = srem <4 x i32> %x, <i32 1, i32 2, i32 4, i32 8> 333 ret <4 x i32> %1 334 } 335 336 define <4 x i32> @combine_vec_srem_by_pow2b_neg(<4 x i32> %x) { 337 ; SSE-LABEL: combine_vec_srem_by_pow2b_neg: 338 ; SSE: # %bb.0: 339 ; SSE-NEXT: movdqa %xmm0, %xmm1 340 ; SSE-NEXT: psrad $31, %xmm1 341 ; SSE-NEXT: movdqa %xmm1, %xmm2 342 ; SSE-NEXT: psrld $28, %xmm2 343 ; SSE-NEXT: movdqa %xmm1, %xmm3 344 ; SSE-NEXT: psrld $30, %xmm3 345 ; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7] 346 ; SSE-NEXT: movdqa %xmm0, %xmm2 347 ; SSE-NEXT: psrld $31, %xmm2 348 ; SSE-NEXT: psrld $29, %xmm1 349 ; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] 350 ; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 351 ; SSE-NEXT: paddd %xmm0, %xmm1 352 ; SSE-NEXT: movdqa %xmm1, %xmm2 353 ; SSE-NEXT: psrad $4, %xmm2 354 ; SSE-NEXT: movdqa %xmm1, %xmm3 355 ; SSE-NEXT: psrad $2, %xmm3 356 ; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7] 357 ; SSE-NEXT: movdqa %xmm1, %xmm2 358 ; SSE-NEXT: psrad $3, %xmm2 359 ; SSE-NEXT: psrad $1, %xmm1 360 ; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 361 ; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 362 ; SSE-NEXT: pmulld {{.*}}(%rip), %xmm1 363 ; SSE-NEXT: paddd %xmm0, %xmm1 364 ; SSE-NEXT: movdqa %xmm1, %xmm0 365 ; SSE-NEXT: retq 366 ; 367 ; AVX1-LABEL: combine_vec_srem_by_pow2b_neg: 368 ; AVX1: # %bb.0: 369 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1 370 ; AVX1-NEXT: vpsrld $28, %xmm1, %xmm2 371 ; AVX1-NEXT: vpsrld $30, %xmm1, %xmm3 372 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 373 ; AVX1-NEXT: vpsrld $31, %xmm0, %xmm3 374 ; AVX1-NEXT: vpsrld $29, %xmm1, %xmm1 375 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7] 376 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 377 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1 378 ; AVX1-NEXT: vpsrad $4, %xmm1, %xmm2 379 ; AVX1-NEXT: vpsrad $2, %xmm1, %xmm3 380 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 381 ; AVX1-NEXT: vpsrad $3, %xmm1, %xmm3 382 ; AVX1-NEXT: vpsrad $1, %xmm1, %xmm1 383 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] 384 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 385 ; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 386 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 387 ; AVX1-NEXT: retq 388 ; 389 ; AVX2-LABEL: combine_vec_srem_by_pow2b_neg: 390 ; AVX2: # %bb.0: 391 ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm1 392 ; AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 393 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1 394 ; AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1 395 ; AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 396 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 397 ; AVX2-NEXT: retq 398 %1 = srem <4 x i32> %x, <i32 -2, i32 -4, i32 -8, i32 -16> 399 ret <4 x i32> %1 400 } 401 402 ; OSS-Fuzz #6883 403 ; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=6883 404 define i32 @ossfuzz6883() { 405 ; CHECK-LABEL: ossfuzz6883: 406 ; CHECK: # %bb.0: 407 ; CHECK-NEXT: movl (%rax), %ecx 408 ; CHECK-NEXT: movl %ecx, %eax 409 ; CHECK-NEXT: cltd 410 ; CHECK-NEXT: idivl %ecx 411 ; CHECK-NEXT: movl %edx, %esi 412 ; CHECK-NEXT: movl $1, %edi 413 ; CHECK-NEXT: cltd 414 ; CHECK-NEXT: idivl %edi 415 ; CHECK-NEXT: movl %edx, %edi 416 ; CHECK-NEXT: xorl %edx, %edx 417 ; CHECK-NEXT: movl %ecx, %eax 418 ; CHECK-NEXT: divl %edi 419 ; CHECK-NEXT: andl %esi, %eax 420 ; CHECK-NEXT: retq 421 %B17 = or i32 0, 2147483647 422 %L6 = load i32, i32* undef 423 %B11 = sdiv i32 %L6, %L6 424 %B13 = udiv i32 %B17, %B17 425 %B14 = srem i32 %B11, %B13 426 %B16 = srem i32 %L6, %L6 427 %B10 = udiv i32 %L6, %B14 428 %B6 = and i32 %B16, %B10 429 ret i32 %B6 430 } 431