1 ; RUN: llc -march=x86-64 -mcpu=core2 < %s | FileCheck %s -check-prefix=SSE2 2 ; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s -check-prefix=SSE4 3 ; RUN: llc -march=x86-64 -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1 4 ; RUN: llc -march=x86-64 -mcpu=core-avx2 -mattr=+avx2 < %s | FileCheck %s -check-prefix=AVX2 5 6 define void @test1(i8* nocapture %a, i8* nocapture %b) nounwind { 7 vector.ph: 8 br label %vector.body 9 10 vector.body: ; preds = %vector.body, %vector.ph 11 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 12 %gep.a = getelementptr inbounds i8* %a, i64 %index 13 %gep.b = getelementptr inbounds i8* %b, i64 %index 14 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 15 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 16 %load.a = load <16 x i8>* %ptr.a, align 2 17 %load.b = load <16 x i8>* %ptr.b, align 2 18 %cmp = icmp slt <16 x i8> %load.a, %load.b 19 %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b 20 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 21 %index.next = add i64 %index, 16 22 %loop = icmp eq i64 %index.next, 16384 23 br i1 %loop, label %for.end, label %vector.body 24 25 for.end: ; preds = %vector.body 26 ret void 27 28 ; SSE4: test1: 29 ; SSE4: pminsb 30 31 ; AVX1: test1: 32 ; AVX1: vpminsb 33 34 ; AVX2: test1: 35 ; AVX2: vpminsb 36 } 37 38 define void @test2(i8* nocapture %a, i8* nocapture %b) nounwind { 39 vector.ph: 40 br label %vector.body 41 42 vector.body: ; preds = %vector.body, %vector.ph 43 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 44 %gep.a = getelementptr inbounds i8* %a, i64 %index 45 %gep.b = getelementptr inbounds i8* %b, i64 %index 46 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 47 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 48 %load.a = load <16 x i8>* %ptr.a, align 2 49 %load.b = load <16 x i8>* %ptr.b, align 2 50 %cmp = icmp sle <16 x i8> %load.a, %load.b 51 %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b 52 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 53 %index.next = add i64 %index, 16 54 %loop = icmp eq i64 %index.next, 16384 55 br i1 %loop, label %for.end, label %vector.body 56 57 for.end: ; preds = %vector.body 58 ret void 59 60 ; SSE4: test2: 61 ; SSE4: pminsb 62 63 ; AVX1: test2: 64 ; AVX1: vpminsb 65 66 ; AVX2: test2: 67 ; AVX2: vpminsb 68 } 69 70 define void @test3(i8* nocapture %a, i8* nocapture %b) nounwind { 71 vector.ph: 72 br label %vector.body 73 74 vector.body: ; preds = %vector.body, %vector.ph 75 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 76 %gep.a = getelementptr inbounds i8* %a, i64 %index 77 %gep.b = getelementptr inbounds i8* %b, i64 %index 78 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 79 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 80 %load.a = load <16 x i8>* %ptr.a, align 2 81 %load.b = load <16 x i8>* %ptr.b, align 2 82 %cmp = icmp sgt <16 x i8> %load.a, %load.b 83 %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b 84 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 85 %index.next = add i64 %index, 16 86 %loop = icmp eq i64 %index.next, 16384 87 br i1 %loop, label %for.end, label %vector.body 88 89 for.end: ; preds = %vector.body 90 ret void 91 92 ; SSE4: test3: 93 ; SSE4: pmaxsb 94 95 ; AVX1: test3: 96 ; AVX1: vpmaxsb 97 98 ; AVX2: test3: 99 ; AVX2: vpmaxsb 100 } 101 102 define void @test4(i8* nocapture %a, i8* nocapture %b) nounwind { 103 vector.ph: 104 br label %vector.body 105 106 vector.body: ; preds = %vector.body, %vector.ph 107 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 108 %gep.a = getelementptr inbounds i8* %a, i64 %index 109 %gep.b = getelementptr inbounds i8* %b, i64 %index 110 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 111 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 112 %load.a = load <16 x i8>* %ptr.a, align 2 113 %load.b = load <16 x i8>* %ptr.b, align 2 114 %cmp = icmp sge <16 x i8> %load.a, %load.b 115 %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b 116 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 117 %index.next = add i64 %index, 16 118 %loop = icmp eq i64 %index.next, 16384 119 br i1 %loop, label %for.end, label %vector.body 120 121 for.end: ; preds = %vector.body 122 ret void 123 124 ; SSE4: test4: 125 ; SSE4: pmaxsb 126 127 ; AVX1: test4: 128 ; AVX1: vpmaxsb 129 130 ; AVX2: test4: 131 ; AVX2: vpmaxsb 132 } 133 134 define void @test5(i8* nocapture %a, i8* nocapture %b) nounwind { 135 vector.ph: 136 br label %vector.body 137 138 vector.body: ; preds = %vector.body, %vector.ph 139 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 140 %gep.a = getelementptr inbounds i8* %a, i64 %index 141 %gep.b = getelementptr inbounds i8* %b, i64 %index 142 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 143 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 144 %load.a = load <16 x i8>* %ptr.a, align 2 145 %load.b = load <16 x i8>* %ptr.b, align 2 146 %cmp = icmp ult <16 x i8> %load.a, %load.b 147 %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b 148 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 149 %index.next = add i64 %index, 16 150 %loop = icmp eq i64 %index.next, 16384 151 br i1 %loop, label %for.end, label %vector.body 152 153 for.end: ; preds = %vector.body 154 ret void 155 156 ; SSE2: test5: 157 ; SSE2: pminub 158 159 ; AVX1: test5: 160 ; AVX1: vpminub 161 162 ; AVX2: test5: 163 ; AVX2: vpminub 164 } 165 166 define void @test6(i8* nocapture %a, i8* nocapture %b) nounwind { 167 vector.ph: 168 br label %vector.body 169 170 vector.body: ; preds = %vector.body, %vector.ph 171 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 172 %gep.a = getelementptr inbounds i8* %a, i64 %index 173 %gep.b = getelementptr inbounds i8* %b, i64 %index 174 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 175 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 176 %load.a = load <16 x i8>* %ptr.a, align 2 177 %load.b = load <16 x i8>* %ptr.b, align 2 178 %cmp = icmp ule <16 x i8> %load.a, %load.b 179 %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b 180 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 181 %index.next = add i64 %index, 16 182 %loop = icmp eq i64 %index.next, 16384 183 br i1 %loop, label %for.end, label %vector.body 184 185 for.end: ; preds = %vector.body 186 ret void 187 188 ; SSE2: test6: 189 ; SSE2: pminub 190 191 ; AVX1: test6: 192 ; AVX1: vpminub 193 194 ; AVX2: test6: 195 ; AVX2: vpminub 196 } 197 198 define void @test7(i8* nocapture %a, i8* nocapture %b) nounwind { 199 vector.ph: 200 br label %vector.body 201 202 vector.body: ; preds = %vector.body, %vector.ph 203 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 204 %gep.a = getelementptr inbounds i8* %a, i64 %index 205 %gep.b = getelementptr inbounds i8* %b, i64 %index 206 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 207 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 208 %load.a = load <16 x i8>* %ptr.a, align 2 209 %load.b = load <16 x i8>* %ptr.b, align 2 210 %cmp = icmp ugt <16 x i8> %load.a, %load.b 211 %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b 212 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 213 %index.next = add i64 %index, 16 214 %loop = icmp eq i64 %index.next, 16384 215 br i1 %loop, label %for.end, label %vector.body 216 217 for.end: ; preds = %vector.body 218 ret void 219 220 ; SSE2: test7: 221 ; SSE2: pmaxub 222 223 ; AVX1: test7: 224 ; AVX1: vpmaxub 225 226 ; AVX2: test7: 227 ; AVX2: vpmaxub 228 } 229 230 define void @test8(i8* nocapture %a, i8* nocapture %b) nounwind { 231 vector.ph: 232 br label %vector.body 233 234 vector.body: ; preds = %vector.body, %vector.ph 235 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 236 %gep.a = getelementptr inbounds i8* %a, i64 %index 237 %gep.b = getelementptr inbounds i8* %b, i64 %index 238 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 239 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 240 %load.a = load <16 x i8>* %ptr.a, align 2 241 %load.b = load <16 x i8>* %ptr.b, align 2 242 %cmp = icmp uge <16 x i8> %load.a, %load.b 243 %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b 244 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 245 %index.next = add i64 %index, 16 246 %loop = icmp eq i64 %index.next, 16384 247 br i1 %loop, label %for.end, label %vector.body 248 249 for.end: ; preds = %vector.body 250 ret void 251 252 ; SSE2: test8: 253 ; SSE2: pmaxub 254 255 ; AVX1: test8: 256 ; AVX1: vpmaxub 257 258 ; AVX2: test8: 259 ; AVX2: vpmaxub 260 } 261 262 define void @test9(i16* nocapture %a, i16* nocapture %b) nounwind { 263 vector.ph: 264 br label %vector.body 265 266 vector.body: ; preds = %vector.body, %vector.ph 267 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 268 %gep.a = getelementptr inbounds i16* %a, i64 %index 269 %gep.b = getelementptr inbounds i16* %b, i64 %index 270 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 271 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 272 %load.a = load <8 x i16>* %ptr.a, align 2 273 %load.b = load <8 x i16>* %ptr.b, align 2 274 %cmp = icmp slt <8 x i16> %load.a, %load.b 275 %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b 276 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 277 %index.next = add i64 %index, 8 278 %loop = icmp eq i64 %index.next, 16384 279 br i1 %loop, label %for.end, label %vector.body 280 281 for.end: ; preds = %vector.body 282 ret void 283 284 ; SSE2: test9: 285 ; SSE2: pminsw 286 287 ; AVX1: test9: 288 ; AVX1: vpminsw 289 290 ; AVX2: test9: 291 ; AVX2: vpminsw 292 } 293 294 define void @test10(i16* nocapture %a, i16* nocapture %b) nounwind { 295 vector.ph: 296 br label %vector.body 297 298 vector.body: ; preds = %vector.body, %vector.ph 299 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 300 %gep.a = getelementptr inbounds i16* %a, i64 %index 301 %gep.b = getelementptr inbounds i16* %b, i64 %index 302 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 303 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 304 %load.a = load <8 x i16>* %ptr.a, align 2 305 %load.b = load <8 x i16>* %ptr.b, align 2 306 %cmp = icmp sle <8 x i16> %load.a, %load.b 307 %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b 308 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 309 %index.next = add i64 %index, 8 310 %loop = icmp eq i64 %index.next, 16384 311 br i1 %loop, label %for.end, label %vector.body 312 313 for.end: ; preds = %vector.body 314 ret void 315 316 ; SSE2: test10: 317 ; SSE2: pminsw 318 319 ; AVX1: test10: 320 ; AVX1: vpminsw 321 322 ; AVX2: test10: 323 ; AVX2: vpminsw 324 } 325 326 define void @test11(i16* nocapture %a, i16* nocapture %b) nounwind { 327 vector.ph: 328 br label %vector.body 329 330 vector.body: ; preds = %vector.body, %vector.ph 331 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 332 %gep.a = getelementptr inbounds i16* %a, i64 %index 333 %gep.b = getelementptr inbounds i16* %b, i64 %index 334 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 335 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 336 %load.a = load <8 x i16>* %ptr.a, align 2 337 %load.b = load <8 x i16>* %ptr.b, align 2 338 %cmp = icmp sgt <8 x i16> %load.a, %load.b 339 %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b 340 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 341 %index.next = add i64 %index, 8 342 %loop = icmp eq i64 %index.next, 16384 343 br i1 %loop, label %for.end, label %vector.body 344 345 for.end: ; preds = %vector.body 346 ret void 347 348 ; SSE2: test11: 349 ; SSE2: pmaxsw 350 351 ; AVX1: test11: 352 ; AVX1: vpmaxsw 353 354 ; AVX2: test11: 355 ; AVX2: vpmaxsw 356 } 357 358 define void @test12(i16* nocapture %a, i16* nocapture %b) nounwind { 359 vector.ph: 360 br label %vector.body 361 362 vector.body: ; preds = %vector.body, %vector.ph 363 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 364 %gep.a = getelementptr inbounds i16* %a, i64 %index 365 %gep.b = getelementptr inbounds i16* %b, i64 %index 366 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 367 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 368 %load.a = load <8 x i16>* %ptr.a, align 2 369 %load.b = load <8 x i16>* %ptr.b, align 2 370 %cmp = icmp sge <8 x i16> %load.a, %load.b 371 %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b 372 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 373 %index.next = add i64 %index, 8 374 %loop = icmp eq i64 %index.next, 16384 375 br i1 %loop, label %for.end, label %vector.body 376 377 for.end: ; preds = %vector.body 378 ret void 379 380 ; SSE2: test12: 381 ; SSE2: pmaxsw 382 383 ; AVX1: test12: 384 ; AVX1: vpmaxsw 385 386 ; AVX2: test12: 387 ; AVX2: vpmaxsw 388 } 389 390 define void @test13(i16* nocapture %a, i16* nocapture %b) nounwind { 391 vector.ph: 392 br label %vector.body 393 394 vector.body: ; preds = %vector.body, %vector.ph 395 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 396 %gep.a = getelementptr inbounds i16* %a, i64 %index 397 %gep.b = getelementptr inbounds i16* %b, i64 %index 398 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 399 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 400 %load.a = load <8 x i16>* %ptr.a, align 2 401 %load.b = load <8 x i16>* %ptr.b, align 2 402 %cmp = icmp ult <8 x i16> %load.a, %load.b 403 %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b 404 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 405 %index.next = add i64 %index, 8 406 %loop = icmp eq i64 %index.next, 16384 407 br i1 %loop, label %for.end, label %vector.body 408 409 for.end: ; preds = %vector.body 410 ret void 411 412 ; SSE4: test13: 413 ; SSE4: pminuw 414 415 ; AVX1: test13: 416 ; AVX1: vpminuw 417 418 ; AVX2: test13: 419 ; AVX2: vpminuw 420 } 421 422 define void @test14(i16* nocapture %a, i16* nocapture %b) nounwind { 423 vector.ph: 424 br label %vector.body 425 426 vector.body: ; preds = %vector.body, %vector.ph 427 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 428 %gep.a = getelementptr inbounds i16* %a, i64 %index 429 %gep.b = getelementptr inbounds i16* %b, i64 %index 430 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 431 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 432 %load.a = load <8 x i16>* %ptr.a, align 2 433 %load.b = load <8 x i16>* %ptr.b, align 2 434 %cmp = icmp ule <8 x i16> %load.a, %load.b 435 %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b 436 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 437 %index.next = add i64 %index, 8 438 %loop = icmp eq i64 %index.next, 16384 439 br i1 %loop, label %for.end, label %vector.body 440 441 for.end: ; preds = %vector.body 442 ret void 443 444 ; SSE4: test14: 445 ; SSE4: pminuw 446 447 ; AVX1: test14: 448 ; AVX1: vpminuw 449 450 ; AVX2: test14: 451 ; AVX2: vpminuw 452 } 453 454 define void @test15(i16* nocapture %a, i16* nocapture %b) nounwind { 455 vector.ph: 456 br label %vector.body 457 458 vector.body: ; preds = %vector.body, %vector.ph 459 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 460 %gep.a = getelementptr inbounds i16* %a, i64 %index 461 %gep.b = getelementptr inbounds i16* %b, i64 %index 462 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 463 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 464 %load.a = load <8 x i16>* %ptr.a, align 2 465 %load.b = load <8 x i16>* %ptr.b, align 2 466 %cmp = icmp ugt <8 x i16> %load.a, %load.b 467 %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b 468 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 469 %index.next = add i64 %index, 8 470 %loop = icmp eq i64 %index.next, 16384 471 br i1 %loop, label %for.end, label %vector.body 472 473 for.end: ; preds = %vector.body 474 ret void 475 476 ; SSE4: test15: 477 ; SSE4: pmaxuw 478 479 ; AVX1: test15: 480 ; AVX1: vpmaxuw 481 482 ; AVX2: test15: 483 ; AVX2: vpmaxuw 484 } 485 486 define void @test16(i16* nocapture %a, i16* nocapture %b) nounwind { 487 vector.ph: 488 br label %vector.body 489 490 vector.body: ; preds = %vector.body, %vector.ph 491 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 492 %gep.a = getelementptr inbounds i16* %a, i64 %index 493 %gep.b = getelementptr inbounds i16* %b, i64 %index 494 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 495 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 496 %load.a = load <8 x i16>* %ptr.a, align 2 497 %load.b = load <8 x i16>* %ptr.b, align 2 498 %cmp = icmp uge <8 x i16> %load.a, %load.b 499 %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b 500 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 501 %index.next = add i64 %index, 8 502 %loop = icmp eq i64 %index.next, 16384 503 br i1 %loop, label %for.end, label %vector.body 504 505 for.end: ; preds = %vector.body 506 ret void 507 508 ; SSE4: test16: 509 ; SSE4: pmaxuw 510 511 ; AVX1: test16: 512 ; AVX1: vpmaxuw 513 514 ; AVX2: test16: 515 ; AVX2: vpmaxuw 516 } 517 518 define void @test17(i32* nocapture %a, i32* nocapture %b) nounwind { 519 vector.ph: 520 br label %vector.body 521 522 vector.body: ; preds = %vector.body, %vector.ph 523 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 524 %gep.a = getelementptr inbounds i32* %a, i64 %index 525 %gep.b = getelementptr inbounds i32* %b, i64 %index 526 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 527 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 528 %load.a = load <4 x i32>* %ptr.a, align 2 529 %load.b = load <4 x i32>* %ptr.b, align 2 530 %cmp = icmp slt <4 x i32> %load.a, %load.b 531 %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b 532 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 533 %index.next = add i64 %index, 4 534 %loop = icmp eq i64 %index.next, 16384 535 br i1 %loop, label %for.end, label %vector.body 536 537 for.end: ; preds = %vector.body 538 ret void 539 540 ; SSE4: test17: 541 ; SSE4: pminsd 542 543 ; AVX1: test17: 544 ; AVX1: vpminsd 545 546 ; AVX2: test17: 547 ; AVX2: vpminsd 548 } 549 550 define void @test18(i32* nocapture %a, i32* nocapture %b) nounwind { 551 vector.ph: 552 br label %vector.body 553 554 vector.body: ; preds = %vector.body, %vector.ph 555 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 556 %gep.a = getelementptr inbounds i32* %a, i64 %index 557 %gep.b = getelementptr inbounds i32* %b, i64 %index 558 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 559 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 560 %load.a = load <4 x i32>* %ptr.a, align 2 561 %load.b = load <4 x i32>* %ptr.b, align 2 562 %cmp = icmp sle <4 x i32> %load.a, %load.b 563 %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b 564 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 565 %index.next = add i64 %index, 4 566 %loop = icmp eq i64 %index.next, 16384 567 br i1 %loop, label %for.end, label %vector.body 568 569 for.end: ; preds = %vector.body 570 ret void 571 572 ; SSE4: test18: 573 ; SSE4: pminsd 574 575 ; AVX1: test18: 576 ; AVX1: vpminsd 577 578 ; AVX2: test18: 579 ; AVX2: vpminsd 580 } 581 582 define void @test19(i32* nocapture %a, i32* nocapture %b) nounwind { 583 vector.ph: 584 br label %vector.body 585 586 vector.body: ; preds = %vector.body, %vector.ph 587 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 588 %gep.a = getelementptr inbounds i32* %a, i64 %index 589 %gep.b = getelementptr inbounds i32* %b, i64 %index 590 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 591 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 592 %load.a = load <4 x i32>* %ptr.a, align 2 593 %load.b = load <4 x i32>* %ptr.b, align 2 594 %cmp = icmp sgt <4 x i32> %load.a, %load.b 595 %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b 596 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 597 %index.next = add i64 %index, 4 598 %loop = icmp eq i64 %index.next, 16384 599 br i1 %loop, label %for.end, label %vector.body 600 601 for.end: ; preds = %vector.body 602 ret void 603 604 ; SSE4: test19: 605 ; SSE4: pmaxsd 606 607 ; AVX1: test19: 608 ; AVX1: vpmaxsd 609 610 ; AVX2: test19: 611 ; AVX2: vpmaxsd 612 } 613 614 define void @test20(i32* nocapture %a, i32* nocapture %b) nounwind { 615 vector.ph: 616 br label %vector.body 617 618 vector.body: ; preds = %vector.body, %vector.ph 619 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 620 %gep.a = getelementptr inbounds i32* %a, i64 %index 621 %gep.b = getelementptr inbounds i32* %b, i64 %index 622 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 623 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 624 %load.a = load <4 x i32>* %ptr.a, align 2 625 %load.b = load <4 x i32>* %ptr.b, align 2 626 %cmp = icmp sge <4 x i32> %load.a, %load.b 627 %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b 628 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 629 %index.next = add i64 %index, 4 630 %loop = icmp eq i64 %index.next, 16384 631 br i1 %loop, label %for.end, label %vector.body 632 633 for.end: ; preds = %vector.body 634 ret void 635 636 ; SSE4: test20: 637 ; SSE4: pmaxsd 638 639 ; AVX1: test20: 640 ; AVX1: vpmaxsd 641 642 ; AVX2: test20: 643 ; AVX2: vpmaxsd 644 } 645 646 define void @test21(i32* nocapture %a, i32* nocapture %b) nounwind { 647 vector.ph: 648 br label %vector.body 649 650 vector.body: ; preds = %vector.body, %vector.ph 651 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 652 %gep.a = getelementptr inbounds i32* %a, i64 %index 653 %gep.b = getelementptr inbounds i32* %b, i64 %index 654 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 655 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 656 %load.a = load <4 x i32>* %ptr.a, align 2 657 %load.b = load <4 x i32>* %ptr.b, align 2 658 %cmp = icmp ult <4 x i32> %load.a, %load.b 659 %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b 660 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 661 %index.next = add i64 %index, 4 662 %loop = icmp eq i64 %index.next, 16384 663 br i1 %loop, label %for.end, label %vector.body 664 665 for.end: ; preds = %vector.body 666 ret void 667 668 ; SSE4: test21: 669 ; SSE4: pminud 670 671 ; AVX1: test21: 672 ; AVX1: vpminud 673 674 ; AVX2: test21: 675 ; AVX2: vpminud 676 } 677 678 define void @test22(i32* nocapture %a, i32* nocapture %b) nounwind { 679 vector.ph: 680 br label %vector.body 681 682 vector.body: ; preds = %vector.body, %vector.ph 683 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 684 %gep.a = getelementptr inbounds i32* %a, i64 %index 685 %gep.b = getelementptr inbounds i32* %b, i64 %index 686 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 687 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 688 %load.a = load <4 x i32>* %ptr.a, align 2 689 %load.b = load <4 x i32>* %ptr.b, align 2 690 %cmp = icmp ule <4 x i32> %load.a, %load.b 691 %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b 692 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 693 %index.next = add i64 %index, 4 694 %loop = icmp eq i64 %index.next, 16384 695 br i1 %loop, label %for.end, label %vector.body 696 697 for.end: ; preds = %vector.body 698 ret void 699 700 ; SSE4: test22: 701 ; SSE4: pminud 702 703 ; AVX1: test22: 704 ; AVX1: vpminud 705 706 ; AVX2: test22: 707 ; AVX2: vpminud 708 } 709 710 define void @test23(i32* nocapture %a, i32* nocapture %b) nounwind { 711 vector.ph: 712 br label %vector.body 713 714 vector.body: ; preds = %vector.body, %vector.ph 715 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 716 %gep.a = getelementptr inbounds i32* %a, i64 %index 717 %gep.b = getelementptr inbounds i32* %b, i64 %index 718 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 719 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 720 %load.a = load <4 x i32>* %ptr.a, align 2 721 %load.b = load <4 x i32>* %ptr.b, align 2 722 %cmp = icmp ugt <4 x i32> %load.a, %load.b 723 %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b 724 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 725 %index.next = add i64 %index, 4 726 %loop = icmp eq i64 %index.next, 16384 727 br i1 %loop, label %for.end, label %vector.body 728 729 for.end: ; preds = %vector.body 730 ret void 731 732 ; SSE4: test23: 733 ; SSE4: pmaxud 734 735 ; AVX1: test23: 736 ; AVX1: vpmaxud 737 738 ; AVX2: test23: 739 ; AVX2: vpmaxud 740 } 741 742 define void @test24(i32* nocapture %a, i32* nocapture %b) nounwind { 743 vector.ph: 744 br label %vector.body 745 746 vector.body: ; preds = %vector.body, %vector.ph 747 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 748 %gep.a = getelementptr inbounds i32* %a, i64 %index 749 %gep.b = getelementptr inbounds i32* %b, i64 %index 750 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 751 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 752 %load.a = load <4 x i32>* %ptr.a, align 2 753 %load.b = load <4 x i32>* %ptr.b, align 2 754 %cmp = icmp uge <4 x i32> %load.a, %load.b 755 %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b 756 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 757 %index.next = add i64 %index, 4 758 %loop = icmp eq i64 %index.next, 16384 759 br i1 %loop, label %for.end, label %vector.body 760 761 for.end: ; preds = %vector.body 762 ret void 763 764 ; SSE4: test24: 765 ; SSE4: pmaxud 766 767 ; AVX1: test24: 768 ; AVX1: vpmaxud 769 770 ; AVX2: test24: 771 ; AVX2: vpmaxud 772 } 773 774 define void @test25(i8* nocapture %a, i8* nocapture %b) nounwind { 775 vector.ph: 776 br label %vector.body 777 778 vector.body: ; preds = %vector.body, %vector.ph 779 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 780 %gep.a = getelementptr inbounds i8* %a, i64 %index 781 %gep.b = getelementptr inbounds i8* %b, i64 %index 782 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 783 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 784 %load.a = load <32 x i8>* %ptr.a, align 2 785 %load.b = load <32 x i8>* %ptr.b, align 2 786 %cmp = icmp slt <32 x i8> %load.a, %load.b 787 %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b 788 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 789 %index.next = add i64 %index, 32 790 %loop = icmp eq i64 %index.next, 16384 791 br i1 %loop, label %for.end, label %vector.body 792 793 for.end: ; preds = %vector.body 794 ret void 795 796 ; AVX2: test25: 797 ; AVX2: vpminsb 798 } 799 800 define void @test26(i8* nocapture %a, i8* nocapture %b) nounwind { 801 vector.ph: 802 br label %vector.body 803 804 vector.body: ; preds = %vector.body, %vector.ph 805 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 806 %gep.a = getelementptr inbounds i8* %a, i64 %index 807 %gep.b = getelementptr inbounds i8* %b, i64 %index 808 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 809 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 810 %load.a = load <32 x i8>* %ptr.a, align 2 811 %load.b = load <32 x i8>* %ptr.b, align 2 812 %cmp = icmp sle <32 x i8> %load.a, %load.b 813 %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b 814 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 815 %index.next = add i64 %index, 32 816 %loop = icmp eq i64 %index.next, 16384 817 br i1 %loop, label %for.end, label %vector.body 818 819 for.end: ; preds = %vector.body 820 ret void 821 822 ; AVX2: test26: 823 ; AVX2: vpminsb 824 } 825 826 define void @test27(i8* nocapture %a, i8* nocapture %b) nounwind { 827 vector.ph: 828 br label %vector.body 829 830 vector.body: ; preds = %vector.body, %vector.ph 831 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 832 %gep.a = getelementptr inbounds i8* %a, i64 %index 833 %gep.b = getelementptr inbounds i8* %b, i64 %index 834 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 835 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 836 %load.a = load <32 x i8>* %ptr.a, align 2 837 %load.b = load <32 x i8>* %ptr.b, align 2 838 %cmp = icmp sgt <32 x i8> %load.a, %load.b 839 %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b 840 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 841 %index.next = add i64 %index, 32 842 %loop = icmp eq i64 %index.next, 16384 843 br i1 %loop, label %for.end, label %vector.body 844 845 for.end: ; preds = %vector.body 846 ret void 847 848 ; AVX2: test27: 849 ; AVX2: vpmaxsb 850 } 851 852 define void @test28(i8* nocapture %a, i8* nocapture %b) nounwind { 853 vector.ph: 854 br label %vector.body 855 856 vector.body: ; preds = %vector.body, %vector.ph 857 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 858 %gep.a = getelementptr inbounds i8* %a, i64 %index 859 %gep.b = getelementptr inbounds i8* %b, i64 %index 860 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 861 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 862 %load.a = load <32 x i8>* %ptr.a, align 2 863 %load.b = load <32 x i8>* %ptr.b, align 2 864 %cmp = icmp sge <32 x i8> %load.a, %load.b 865 %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b 866 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 867 %index.next = add i64 %index, 32 868 %loop = icmp eq i64 %index.next, 16384 869 br i1 %loop, label %for.end, label %vector.body 870 871 for.end: ; preds = %vector.body 872 ret void 873 874 ; AVX2: test28: 875 ; AVX2: vpmaxsb 876 } 877 878 define void @test29(i8* nocapture %a, i8* nocapture %b) nounwind { 879 vector.ph: 880 br label %vector.body 881 882 vector.body: ; preds = %vector.body, %vector.ph 883 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 884 %gep.a = getelementptr inbounds i8* %a, i64 %index 885 %gep.b = getelementptr inbounds i8* %b, i64 %index 886 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 887 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 888 %load.a = load <32 x i8>* %ptr.a, align 2 889 %load.b = load <32 x i8>* %ptr.b, align 2 890 %cmp = icmp ult <32 x i8> %load.a, %load.b 891 %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b 892 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 893 %index.next = add i64 %index, 32 894 %loop = icmp eq i64 %index.next, 16384 895 br i1 %loop, label %for.end, label %vector.body 896 897 for.end: ; preds = %vector.body 898 ret void 899 900 ; AVX2: test29: 901 ; AVX2: vpminub 902 } 903 904 define void @test30(i8* nocapture %a, i8* nocapture %b) nounwind { 905 vector.ph: 906 br label %vector.body 907 908 vector.body: ; preds = %vector.body, %vector.ph 909 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 910 %gep.a = getelementptr inbounds i8* %a, i64 %index 911 %gep.b = getelementptr inbounds i8* %b, i64 %index 912 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 913 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 914 %load.a = load <32 x i8>* %ptr.a, align 2 915 %load.b = load <32 x i8>* %ptr.b, align 2 916 %cmp = icmp ule <32 x i8> %load.a, %load.b 917 %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b 918 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 919 %index.next = add i64 %index, 32 920 %loop = icmp eq i64 %index.next, 16384 921 br i1 %loop, label %for.end, label %vector.body 922 923 for.end: ; preds = %vector.body 924 ret void 925 926 ; AVX2: test30: 927 ; AVX2: vpminub 928 } 929 930 define void @test31(i8* nocapture %a, i8* nocapture %b) nounwind { 931 vector.ph: 932 br label %vector.body 933 934 vector.body: ; preds = %vector.body, %vector.ph 935 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 936 %gep.a = getelementptr inbounds i8* %a, i64 %index 937 %gep.b = getelementptr inbounds i8* %b, i64 %index 938 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 939 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 940 %load.a = load <32 x i8>* %ptr.a, align 2 941 %load.b = load <32 x i8>* %ptr.b, align 2 942 %cmp = icmp ugt <32 x i8> %load.a, %load.b 943 %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b 944 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 945 %index.next = add i64 %index, 32 946 %loop = icmp eq i64 %index.next, 16384 947 br i1 %loop, label %for.end, label %vector.body 948 949 for.end: ; preds = %vector.body 950 ret void 951 952 ; AVX2: test31: 953 ; AVX2: vpmaxub 954 } 955 956 define void @test32(i8* nocapture %a, i8* nocapture %b) nounwind { 957 vector.ph: 958 br label %vector.body 959 960 vector.body: ; preds = %vector.body, %vector.ph 961 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 962 %gep.a = getelementptr inbounds i8* %a, i64 %index 963 %gep.b = getelementptr inbounds i8* %b, i64 %index 964 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 965 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 966 %load.a = load <32 x i8>* %ptr.a, align 2 967 %load.b = load <32 x i8>* %ptr.b, align 2 968 %cmp = icmp uge <32 x i8> %load.a, %load.b 969 %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b 970 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 971 %index.next = add i64 %index, 32 972 %loop = icmp eq i64 %index.next, 16384 973 br i1 %loop, label %for.end, label %vector.body 974 975 for.end: ; preds = %vector.body 976 ret void 977 978 ; AVX2: test32: 979 ; AVX2: vpmaxub 980 } 981 982 define void @test33(i16* nocapture %a, i16* nocapture %b) nounwind { 983 vector.ph: 984 br label %vector.body 985 986 vector.body: ; preds = %vector.body, %vector.ph 987 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 988 %gep.a = getelementptr inbounds i16* %a, i64 %index 989 %gep.b = getelementptr inbounds i16* %b, i64 %index 990 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 991 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 992 %load.a = load <16 x i16>* %ptr.a, align 2 993 %load.b = load <16 x i16>* %ptr.b, align 2 994 %cmp = icmp slt <16 x i16> %load.a, %load.b 995 %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b 996 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 997 %index.next = add i64 %index, 16 998 %loop = icmp eq i64 %index.next, 16384 999 br i1 %loop, label %for.end, label %vector.body 1000 1001 for.end: ; preds = %vector.body 1002 ret void 1003 1004 ; AVX2: test33: 1005 ; AVX2: vpminsw 1006 } 1007 1008 define void @test34(i16* nocapture %a, i16* nocapture %b) nounwind { 1009 vector.ph: 1010 br label %vector.body 1011 1012 vector.body: ; preds = %vector.body, %vector.ph 1013 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1014 %gep.a = getelementptr inbounds i16* %a, i64 %index 1015 %gep.b = getelementptr inbounds i16* %b, i64 %index 1016 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 1017 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 1018 %load.a = load <16 x i16>* %ptr.a, align 2 1019 %load.b = load <16 x i16>* %ptr.b, align 2 1020 %cmp = icmp sle <16 x i16> %load.a, %load.b 1021 %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b 1022 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 1023 %index.next = add i64 %index, 16 1024 %loop = icmp eq i64 %index.next, 16384 1025 br i1 %loop, label %for.end, label %vector.body 1026 1027 for.end: ; preds = %vector.body 1028 ret void 1029 1030 ; AVX2: test34: 1031 ; AVX2: vpminsw 1032 } 1033 1034 define void @test35(i16* nocapture %a, i16* nocapture %b) nounwind { 1035 vector.ph: 1036 br label %vector.body 1037 1038 vector.body: ; preds = %vector.body, %vector.ph 1039 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1040 %gep.a = getelementptr inbounds i16* %a, i64 %index 1041 %gep.b = getelementptr inbounds i16* %b, i64 %index 1042 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 1043 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 1044 %load.a = load <16 x i16>* %ptr.a, align 2 1045 %load.b = load <16 x i16>* %ptr.b, align 2 1046 %cmp = icmp sgt <16 x i16> %load.a, %load.b 1047 %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b 1048 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 1049 %index.next = add i64 %index, 16 1050 %loop = icmp eq i64 %index.next, 16384 1051 br i1 %loop, label %for.end, label %vector.body 1052 1053 for.end: ; preds = %vector.body 1054 ret void 1055 1056 ; AVX2: test35: 1057 ; AVX2: vpmaxsw 1058 } 1059 1060 define void @test36(i16* nocapture %a, i16* nocapture %b) nounwind { 1061 vector.ph: 1062 br label %vector.body 1063 1064 vector.body: ; preds = %vector.body, %vector.ph 1065 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1066 %gep.a = getelementptr inbounds i16* %a, i64 %index 1067 %gep.b = getelementptr inbounds i16* %b, i64 %index 1068 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 1069 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 1070 %load.a = load <16 x i16>* %ptr.a, align 2 1071 %load.b = load <16 x i16>* %ptr.b, align 2 1072 %cmp = icmp sge <16 x i16> %load.a, %load.b 1073 %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b 1074 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 1075 %index.next = add i64 %index, 16 1076 %loop = icmp eq i64 %index.next, 16384 1077 br i1 %loop, label %for.end, label %vector.body 1078 1079 for.end: ; preds = %vector.body 1080 ret void 1081 1082 ; AVX2: test36: 1083 ; AVX2: vpmaxsw 1084 } 1085 1086 define void @test37(i16* nocapture %a, i16* nocapture %b) nounwind { 1087 vector.ph: 1088 br label %vector.body 1089 1090 vector.body: ; preds = %vector.body, %vector.ph 1091 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1092 %gep.a = getelementptr inbounds i16* %a, i64 %index 1093 %gep.b = getelementptr inbounds i16* %b, i64 %index 1094 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 1095 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 1096 %load.a = load <16 x i16>* %ptr.a, align 2 1097 %load.b = load <16 x i16>* %ptr.b, align 2 1098 %cmp = icmp ult <16 x i16> %load.a, %load.b 1099 %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b 1100 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 1101 %index.next = add i64 %index, 16 1102 %loop = icmp eq i64 %index.next, 16384 1103 br i1 %loop, label %for.end, label %vector.body 1104 1105 for.end: ; preds = %vector.body 1106 ret void 1107 1108 ; AVX2: test37: 1109 ; AVX2: vpminuw 1110 } 1111 1112 define void @test38(i16* nocapture %a, i16* nocapture %b) nounwind { 1113 vector.ph: 1114 br label %vector.body 1115 1116 vector.body: ; preds = %vector.body, %vector.ph 1117 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1118 %gep.a = getelementptr inbounds i16* %a, i64 %index 1119 %gep.b = getelementptr inbounds i16* %b, i64 %index 1120 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 1121 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 1122 %load.a = load <16 x i16>* %ptr.a, align 2 1123 %load.b = load <16 x i16>* %ptr.b, align 2 1124 %cmp = icmp ule <16 x i16> %load.a, %load.b 1125 %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b 1126 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 1127 %index.next = add i64 %index, 16 1128 %loop = icmp eq i64 %index.next, 16384 1129 br i1 %loop, label %for.end, label %vector.body 1130 1131 for.end: ; preds = %vector.body 1132 ret void 1133 1134 ; AVX2: test38: 1135 ; AVX2: vpminuw 1136 } 1137 1138 define void @test39(i16* nocapture %a, i16* nocapture %b) nounwind { 1139 vector.ph: 1140 br label %vector.body 1141 1142 vector.body: ; preds = %vector.body, %vector.ph 1143 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1144 %gep.a = getelementptr inbounds i16* %a, i64 %index 1145 %gep.b = getelementptr inbounds i16* %b, i64 %index 1146 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 1147 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 1148 %load.a = load <16 x i16>* %ptr.a, align 2 1149 %load.b = load <16 x i16>* %ptr.b, align 2 1150 %cmp = icmp ugt <16 x i16> %load.a, %load.b 1151 %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b 1152 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 1153 %index.next = add i64 %index, 16 1154 %loop = icmp eq i64 %index.next, 16384 1155 br i1 %loop, label %for.end, label %vector.body 1156 1157 for.end: ; preds = %vector.body 1158 ret void 1159 1160 ; AVX2: test39: 1161 ; AVX2: vpmaxuw 1162 } 1163 1164 define void @test40(i16* nocapture %a, i16* nocapture %b) nounwind { 1165 vector.ph: 1166 br label %vector.body 1167 1168 vector.body: ; preds = %vector.body, %vector.ph 1169 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1170 %gep.a = getelementptr inbounds i16* %a, i64 %index 1171 %gep.b = getelementptr inbounds i16* %b, i64 %index 1172 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 1173 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 1174 %load.a = load <16 x i16>* %ptr.a, align 2 1175 %load.b = load <16 x i16>* %ptr.b, align 2 1176 %cmp = icmp uge <16 x i16> %load.a, %load.b 1177 %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b 1178 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 1179 %index.next = add i64 %index, 16 1180 %loop = icmp eq i64 %index.next, 16384 1181 br i1 %loop, label %for.end, label %vector.body 1182 1183 for.end: ; preds = %vector.body 1184 ret void 1185 1186 ; AVX2: test40: 1187 ; AVX2: vpmaxuw 1188 } 1189 1190 define void @test41(i32* nocapture %a, i32* nocapture %b) nounwind { 1191 vector.ph: 1192 br label %vector.body 1193 1194 vector.body: ; preds = %vector.body, %vector.ph 1195 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1196 %gep.a = getelementptr inbounds i32* %a, i64 %index 1197 %gep.b = getelementptr inbounds i32* %b, i64 %index 1198 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 1199 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 1200 %load.a = load <8 x i32>* %ptr.a, align 2 1201 %load.b = load <8 x i32>* %ptr.b, align 2 1202 %cmp = icmp slt <8 x i32> %load.a, %load.b 1203 %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b 1204 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 1205 %index.next = add i64 %index, 8 1206 %loop = icmp eq i64 %index.next, 16384 1207 br i1 %loop, label %for.end, label %vector.body 1208 1209 for.end: ; preds = %vector.body 1210 ret void 1211 1212 ; AVX2: test41: 1213 ; AVX2: vpminsd 1214 } 1215 1216 define void @test42(i32* nocapture %a, i32* nocapture %b) nounwind { 1217 vector.ph: 1218 br label %vector.body 1219 1220 vector.body: ; preds = %vector.body, %vector.ph 1221 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1222 %gep.a = getelementptr inbounds i32* %a, i64 %index 1223 %gep.b = getelementptr inbounds i32* %b, i64 %index 1224 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 1225 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 1226 %load.a = load <8 x i32>* %ptr.a, align 2 1227 %load.b = load <8 x i32>* %ptr.b, align 2 1228 %cmp = icmp sle <8 x i32> %load.a, %load.b 1229 %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b 1230 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 1231 %index.next = add i64 %index, 8 1232 %loop = icmp eq i64 %index.next, 16384 1233 br i1 %loop, label %for.end, label %vector.body 1234 1235 for.end: ; preds = %vector.body 1236 ret void 1237 1238 ; AVX2: test42: 1239 ; AVX2: vpminsd 1240 } 1241 1242 define void @test43(i32* nocapture %a, i32* nocapture %b) nounwind { 1243 vector.ph: 1244 br label %vector.body 1245 1246 vector.body: ; preds = %vector.body, %vector.ph 1247 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1248 %gep.a = getelementptr inbounds i32* %a, i64 %index 1249 %gep.b = getelementptr inbounds i32* %b, i64 %index 1250 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 1251 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 1252 %load.a = load <8 x i32>* %ptr.a, align 2 1253 %load.b = load <8 x i32>* %ptr.b, align 2 1254 %cmp = icmp sgt <8 x i32> %load.a, %load.b 1255 %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b 1256 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 1257 %index.next = add i64 %index, 8 1258 %loop = icmp eq i64 %index.next, 16384 1259 br i1 %loop, label %for.end, label %vector.body 1260 1261 for.end: ; preds = %vector.body 1262 ret void 1263 1264 ; AVX2: test43: 1265 ; AVX2: vpmaxsd 1266 } 1267 1268 define void @test44(i32* nocapture %a, i32* nocapture %b) nounwind { 1269 vector.ph: 1270 br label %vector.body 1271 1272 vector.body: ; preds = %vector.body, %vector.ph 1273 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1274 %gep.a = getelementptr inbounds i32* %a, i64 %index 1275 %gep.b = getelementptr inbounds i32* %b, i64 %index 1276 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 1277 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 1278 %load.a = load <8 x i32>* %ptr.a, align 2 1279 %load.b = load <8 x i32>* %ptr.b, align 2 1280 %cmp = icmp sge <8 x i32> %load.a, %load.b 1281 %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b 1282 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 1283 %index.next = add i64 %index, 8 1284 %loop = icmp eq i64 %index.next, 16384 1285 br i1 %loop, label %for.end, label %vector.body 1286 1287 for.end: ; preds = %vector.body 1288 ret void 1289 1290 ; AVX2: test44: 1291 ; AVX2: vpmaxsd 1292 } 1293 1294 define void @test45(i32* nocapture %a, i32* nocapture %b) nounwind { 1295 vector.ph: 1296 br label %vector.body 1297 1298 vector.body: ; preds = %vector.body, %vector.ph 1299 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1300 %gep.a = getelementptr inbounds i32* %a, i64 %index 1301 %gep.b = getelementptr inbounds i32* %b, i64 %index 1302 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 1303 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 1304 %load.a = load <8 x i32>* %ptr.a, align 2 1305 %load.b = load <8 x i32>* %ptr.b, align 2 1306 %cmp = icmp ult <8 x i32> %load.a, %load.b 1307 %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b 1308 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 1309 %index.next = add i64 %index, 8 1310 %loop = icmp eq i64 %index.next, 16384 1311 br i1 %loop, label %for.end, label %vector.body 1312 1313 for.end: ; preds = %vector.body 1314 ret void 1315 1316 ; AVX2: test45: 1317 ; AVX2: vpminud 1318 } 1319 1320 define void @test46(i32* nocapture %a, i32* nocapture %b) nounwind { 1321 vector.ph: 1322 br label %vector.body 1323 1324 vector.body: ; preds = %vector.body, %vector.ph 1325 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1326 %gep.a = getelementptr inbounds i32* %a, i64 %index 1327 %gep.b = getelementptr inbounds i32* %b, i64 %index 1328 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 1329 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 1330 %load.a = load <8 x i32>* %ptr.a, align 2 1331 %load.b = load <8 x i32>* %ptr.b, align 2 1332 %cmp = icmp ule <8 x i32> %load.a, %load.b 1333 %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b 1334 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 1335 %index.next = add i64 %index, 8 1336 %loop = icmp eq i64 %index.next, 16384 1337 br i1 %loop, label %for.end, label %vector.body 1338 1339 for.end: ; preds = %vector.body 1340 ret void 1341 1342 ; AVX2: test46: 1343 ; AVX2: vpminud 1344 } 1345 1346 define void @test47(i32* nocapture %a, i32* nocapture %b) nounwind { 1347 vector.ph: 1348 br label %vector.body 1349 1350 vector.body: ; preds = %vector.body, %vector.ph 1351 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1352 %gep.a = getelementptr inbounds i32* %a, i64 %index 1353 %gep.b = getelementptr inbounds i32* %b, i64 %index 1354 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 1355 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 1356 %load.a = load <8 x i32>* %ptr.a, align 2 1357 %load.b = load <8 x i32>* %ptr.b, align 2 1358 %cmp = icmp ugt <8 x i32> %load.a, %load.b 1359 %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b 1360 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 1361 %index.next = add i64 %index, 8 1362 %loop = icmp eq i64 %index.next, 16384 1363 br i1 %loop, label %for.end, label %vector.body 1364 1365 for.end: ; preds = %vector.body 1366 ret void 1367 1368 ; AVX2: test47: 1369 ; AVX2: vpmaxud 1370 } 1371 1372 define void @test48(i32* nocapture %a, i32* nocapture %b) nounwind { 1373 vector.ph: 1374 br label %vector.body 1375 1376 vector.body: ; preds = %vector.body, %vector.ph 1377 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1378 %gep.a = getelementptr inbounds i32* %a, i64 %index 1379 %gep.b = getelementptr inbounds i32* %b, i64 %index 1380 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 1381 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 1382 %load.a = load <8 x i32>* %ptr.a, align 2 1383 %load.b = load <8 x i32>* %ptr.b, align 2 1384 %cmp = icmp uge <8 x i32> %load.a, %load.b 1385 %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b 1386 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 1387 %index.next = add i64 %index, 8 1388 %loop = icmp eq i64 %index.next, 16384 1389 br i1 %loop, label %for.end, label %vector.body 1390 1391 for.end: ; preds = %vector.body 1392 ret void 1393 1394 ; AVX2: test48: 1395 ; AVX2: vpmaxud 1396 } 1397 1398 define void @test49(i8* nocapture %a, i8* nocapture %b) nounwind { 1399 vector.ph: 1400 br label %vector.body 1401 1402 vector.body: ; preds = %vector.body, %vector.ph 1403 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1404 %gep.a = getelementptr inbounds i8* %a, i64 %index 1405 %gep.b = getelementptr inbounds i8* %b, i64 %index 1406 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 1407 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 1408 %load.a = load <16 x i8>* %ptr.a, align 2 1409 %load.b = load <16 x i8>* %ptr.b, align 2 1410 %cmp = icmp slt <16 x i8> %load.a, %load.b 1411 %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a 1412 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 1413 %index.next = add i64 %index, 16 1414 %loop = icmp eq i64 %index.next, 16384 1415 br i1 %loop, label %for.end, label %vector.body 1416 1417 for.end: ; preds = %vector.body 1418 ret void 1419 1420 ; SSE4: test49: 1421 ; SSE4: pmaxsb 1422 1423 ; AVX1: test49: 1424 ; AVX1: vpmaxsb 1425 1426 ; AVX2: test49: 1427 ; AVX2: vpmaxsb 1428 } 1429 1430 define void @test50(i8* nocapture %a, i8* nocapture %b) nounwind { 1431 vector.ph: 1432 br label %vector.body 1433 1434 vector.body: ; preds = %vector.body, %vector.ph 1435 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1436 %gep.a = getelementptr inbounds i8* %a, i64 %index 1437 %gep.b = getelementptr inbounds i8* %b, i64 %index 1438 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 1439 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 1440 %load.a = load <16 x i8>* %ptr.a, align 2 1441 %load.b = load <16 x i8>* %ptr.b, align 2 1442 %cmp = icmp sle <16 x i8> %load.a, %load.b 1443 %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a 1444 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 1445 %index.next = add i64 %index, 16 1446 %loop = icmp eq i64 %index.next, 16384 1447 br i1 %loop, label %for.end, label %vector.body 1448 1449 for.end: ; preds = %vector.body 1450 ret void 1451 1452 ; SSE4: test50: 1453 ; SSE4: pmaxsb 1454 1455 ; AVX1: test50: 1456 ; AVX1: vpmaxsb 1457 1458 ; AVX2: test50: 1459 ; AVX2: vpmaxsb 1460 } 1461 1462 define void @test51(i8* nocapture %a, i8* nocapture %b) nounwind { 1463 vector.ph: 1464 br label %vector.body 1465 1466 vector.body: ; preds = %vector.body, %vector.ph 1467 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1468 %gep.a = getelementptr inbounds i8* %a, i64 %index 1469 %gep.b = getelementptr inbounds i8* %b, i64 %index 1470 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 1471 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 1472 %load.a = load <16 x i8>* %ptr.a, align 2 1473 %load.b = load <16 x i8>* %ptr.b, align 2 1474 %cmp = icmp sgt <16 x i8> %load.a, %load.b 1475 %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a 1476 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 1477 %index.next = add i64 %index, 16 1478 %loop = icmp eq i64 %index.next, 16384 1479 br i1 %loop, label %for.end, label %vector.body 1480 1481 for.end: ; preds = %vector.body 1482 ret void 1483 1484 ; SSE4: test51: 1485 ; SSE4: pminsb 1486 1487 ; AVX1: test51: 1488 ; AVX1: vpminsb 1489 1490 ; AVX2: test51: 1491 ; AVX2: vpminsb 1492 } 1493 1494 define void @test52(i8* nocapture %a, i8* nocapture %b) nounwind { 1495 vector.ph: 1496 br label %vector.body 1497 1498 vector.body: ; preds = %vector.body, %vector.ph 1499 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1500 %gep.a = getelementptr inbounds i8* %a, i64 %index 1501 %gep.b = getelementptr inbounds i8* %b, i64 %index 1502 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 1503 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 1504 %load.a = load <16 x i8>* %ptr.a, align 2 1505 %load.b = load <16 x i8>* %ptr.b, align 2 1506 %cmp = icmp sge <16 x i8> %load.a, %load.b 1507 %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a 1508 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 1509 %index.next = add i64 %index, 16 1510 %loop = icmp eq i64 %index.next, 16384 1511 br i1 %loop, label %for.end, label %vector.body 1512 1513 for.end: ; preds = %vector.body 1514 ret void 1515 1516 ; SSE4: test52: 1517 ; SSE4: pminsb 1518 1519 ; AVX1: test52: 1520 ; AVX1: vpminsb 1521 1522 ; AVX2: test52: 1523 ; AVX2: vpminsb 1524 } 1525 1526 define void @test53(i8* nocapture %a, i8* nocapture %b) nounwind { 1527 vector.ph: 1528 br label %vector.body 1529 1530 vector.body: ; preds = %vector.body, %vector.ph 1531 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1532 %gep.a = getelementptr inbounds i8* %a, i64 %index 1533 %gep.b = getelementptr inbounds i8* %b, i64 %index 1534 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 1535 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 1536 %load.a = load <16 x i8>* %ptr.a, align 2 1537 %load.b = load <16 x i8>* %ptr.b, align 2 1538 %cmp = icmp ult <16 x i8> %load.a, %load.b 1539 %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a 1540 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 1541 %index.next = add i64 %index, 16 1542 %loop = icmp eq i64 %index.next, 16384 1543 br i1 %loop, label %for.end, label %vector.body 1544 1545 for.end: ; preds = %vector.body 1546 ret void 1547 1548 ; SSE2: test53: 1549 ; SSE2: pmaxub 1550 1551 ; AVX1: test53: 1552 ; AVX1: vpmaxub 1553 1554 ; AVX2: test53: 1555 ; AVX2: vpmaxub 1556 } 1557 1558 define void @test54(i8* nocapture %a, i8* nocapture %b) nounwind { 1559 vector.ph: 1560 br label %vector.body 1561 1562 vector.body: ; preds = %vector.body, %vector.ph 1563 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1564 %gep.a = getelementptr inbounds i8* %a, i64 %index 1565 %gep.b = getelementptr inbounds i8* %b, i64 %index 1566 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 1567 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 1568 %load.a = load <16 x i8>* %ptr.a, align 2 1569 %load.b = load <16 x i8>* %ptr.b, align 2 1570 %cmp = icmp ule <16 x i8> %load.a, %load.b 1571 %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a 1572 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 1573 %index.next = add i64 %index, 16 1574 %loop = icmp eq i64 %index.next, 16384 1575 br i1 %loop, label %for.end, label %vector.body 1576 1577 for.end: ; preds = %vector.body 1578 ret void 1579 1580 ; SSE2: test54: 1581 ; SSE2: pmaxub 1582 1583 ; AVX1: test54: 1584 ; AVX1: vpmaxub 1585 1586 ; AVX2: test54: 1587 ; AVX2: vpmaxub 1588 } 1589 1590 define void @test55(i8* nocapture %a, i8* nocapture %b) nounwind { 1591 vector.ph: 1592 br label %vector.body 1593 1594 vector.body: ; preds = %vector.body, %vector.ph 1595 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1596 %gep.a = getelementptr inbounds i8* %a, i64 %index 1597 %gep.b = getelementptr inbounds i8* %b, i64 %index 1598 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 1599 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 1600 %load.a = load <16 x i8>* %ptr.a, align 2 1601 %load.b = load <16 x i8>* %ptr.b, align 2 1602 %cmp = icmp ugt <16 x i8> %load.a, %load.b 1603 %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a 1604 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 1605 %index.next = add i64 %index, 16 1606 %loop = icmp eq i64 %index.next, 16384 1607 br i1 %loop, label %for.end, label %vector.body 1608 1609 for.end: ; preds = %vector.body 1610 ret void 1611 1612 ; SSE2: test55: 1613 ; SSE2: pminub 1614 1615 ; AVX1: test55: 1616 ; AVX1: vpminub 1617 1618 ; AVX2: test55: 1619 ; AVX2: vpminub 1620 } 1621 1622 define void @test56(i8* nocapture %a, i8* nocapture %b) nounwind { 1623 vector.ph: 1624 br label %vector.body 1625 1626 vector.body: ; preds = %vector.body, %vector.ph 1627 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1628 %gep.a = getelementptr inbounds i8* %a, i64 %index 1629 %gep.b = getelementptr inbounds i8* %b, i64 %index 1630 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 1631 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 1632 %load.a = load <16 x i8>* %ptr.a, align 2 1633 %load.b = load <16 x i8>* %ptr.b, align 2 1634 %cmp = icmp uge <16 x i8> %load.a, %load.b 1635 %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a 1636 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 1637 %index.next = add i64 %index, 16 1638 %loop = icmp eq i64 %index.next, 16384 1639 br i1 %loop, label %for.end, label %vector.body 1640 1641 for.end: ; preds = %vector.body 1642 ret void 1643 1644 ; SSE2: test56: 1645 ; SSE2: pminub 1646 1647 ; AVX1: test56: 1648 ; AVX1: vpminub 1649 1650 ; AVX2: test56: 1651 ; AVX2: vpminub 1652 } 1653 1654 define void @test57(i16* nocapture %a, i16* nocapture %b) nounwind { 1655 vector.ph: 1656 br label %vector.body 1657 1658 vector.body: ; preds = %vector.body, %vector.ph 1659 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1660 %gep.a = getelementptr inbounds i16* %a, i64 %index 1661 %gep.b = getelementptr inbounds i16* %b, i64 %index 1662 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 1663 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 1664 %load.a = load <8 x i16>* %ptr.a, align 2 1665 %load.b = load <8 x i16>* %ptr.b, align 2 1666 %cmp = icmp slt <8 x i16> %load.a, %load.b 1667 %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a 1668 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 1669 %index.next = add i64 %index, 8 1670 %loop = icmp eq i64 %index.next, 16384 1671 br i1 %loop, label %for.end, label %vector.body 1672 1673 for.end: ; preds = %vector.body 1674 ret void 1675 1676 ; SSE2: test57: 1677 ; SSE2: pmaxsw 1678 1679 ; AVX1: test57: 1680 ; AVX1: vpmaxsw 1681 1682 ; AVX2: test57: 1683 ; AVX2: vpmaxsw 1684 } 1685 1686 define void @test58(i16* nocapture %a, i16* nocapture %b) nounwind { 1687 vector.ph: 1688 br label %vector.body 1689 1690 vector.body: ; preds = %vector.body, %vector.ph 1691 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1692 %gep.a = getelementptr inbounds i16* %a, i64 %index 1693 %gep.b = getelementptr inbounds i16* %b, i64 %index 1694 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 1695 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 1696 %load.a = load <8 x i16>* %ptr.a, align 2 1697 %load.b = load <8 x i16>* %ptr.b, align 2 1698 %cmp = icmp sle <8 x i16> %load.a, %load.b 1699 %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a 1700 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 1701 %index.next = add i64 %index, 8 1702 %loop = icmp eq i64 %index.next, 16384 1703 br i1 %loop, label %for.end, label %vector.body 1704 1705 for.end: ; preds = %vector.body 1706 ret void 1707 1708 ; SSE2: test58: 1709 ; SSE2: pmaxsw 1710 1711 ; AVX1: test58: 1712 ; AVX1: vpmaxsw 1713 1714 ; AVX2: test58: 1715 ; AVX2: vpmaxsw 1716 } 1717 1718 define void @test59(i16* nocapture %a, i16* nocapture %b) nounwind { 1719 vector.ph: 1720 br label %vector.body 1721 1722 vector.body: ; preds = %vector.body, %vector.ph 1723 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1724 %gep.a = getelementptr inbounds i16* %a, i64 %index 1725 %gep.b = getelementptr inbounds i16* %b, i64 %index 1726 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 1727 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 1728 %load.a = load <8 x i16>* %ptr.a, align 2 1729 %load.b = load <8 x i16>* %ptr.b, align 2 1730 %cmp = icmp sgt <8 x i16> %load.a, %load.b 1731 %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a 1732 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 1733 %index.next = add i64 %index, 8 1734 %loop = icmp eq i64 %index.next, 16384 1735 br i1 %loop, label %for.end, label %vector.body 1736 1737 for.end: ; preds = %vector.body 1738 ret void 1739 1740 ; SSE2: test59: 1741 ; SSE2: pminsw 1742 1743 ; AVX1: test59: 1744 ; AVX1: vpminsw 1745 1746 ; AVX2: test59: 1747 ; AVX2: vpminsw 1748 } 1749 1750 define void @test60(i16* nocapture %a, i16* nocapture %b) nounwind { 1751 vector.ph: 1752 br label %vector.body 1753 1754 vector.body: ; preds = %vector.body, %vector.ph 1755 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1756 %gep.a = getelementptr inbounds i16* %a, i64 %index 1757 %gep.b = getelementptr inbounds i16* %b, i64 %index 1758 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 1759 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 1760 %load.a = load <8 x i16>* %ptr.a, align 2 1761 %load.b = load <8 x i16>* %ptr.b, align 2 1762 %cmp = icmp sge <8 x i16> %load.a, %load.b 1763 %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a 1764 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 1765 %index.next = add i64 %index, 8 1766 %loop = icmp eq i64 %index.next, 16384 1767 br i1 %loop, label %for.end, label %vector.body 1768 1769 for.end: ; preds = %vector.body 1770 ret void 1771 1772 ; SSE2: test60: 1773 ; SSE2: pminsw 1774 1775 ; AVX1: test60: 1776 ; AVX1: vpminsw 1777 1778 ; AVX2: test60: 1779 ; AVX2: vpminsw 1780 } 1781 1782 define void @test61(i16* nocapture %a, i16* nocapture %b) nounwind { 1783 vector.ph: 1784 br label %vector.body 1785 1786 vector.body: ; preds = %vector.body, %vector.ph 1787 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1788 %gep.a = getelementptr inbounds i16* %a, i64 %index 1789 %gep.b = getelementptr inbounds i16* %b, i64 %index 1790 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 1791 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 1792 %load.a = load <8 x i16>* %ptr.a, align 2 1793 %load.b = load <8 x i16>* %ptr.b, align 2 1794 %cmp = icmp ult <8 x i16> %load.a, %load.b 1795 %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a 1796 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 1797 %index.next = add i64 %index, 8 1798 %loop = icmp eq i64 %index.next, 16384 1799 br i1 %loop, label %for.end, label %vector.body 1800 1801 for.end: ; preds = %vector.body 1802 ret void 1803 1804 ; SSE4: test61: 1805 ; SSE4: pmaxuw 1806 1807 ; AVX1: test61: 1808 ; AVX1: vpmaxuw 1809 1810 ; AVX2: test61: 1811 ; AVX2: vpmaxuw 1812 } 1813 1814 define void @test62(i16* nocapture %a, i16* nocapture %b) nounwind { 1815 vector.ph: 1816 br label %vector.body 1817 1818 vector.body: ; preds = %vector.body, %vector.ph 1819 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1820 %gep.a = getelementptr inbounds i16* %a, i64 %index 1821 %gep.b = getelementptr inbounds i16* %b, i64 %index 1822 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 1823 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 1824 %load.a = load <8 x i16>* %ptr.a, align 2 1825 %load.b = load <8 x i16>* %ptr.b, align 2 1826 %cmp = icmp ule <8 x i16> %load.a, %load.b 1827 %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a 1828 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 1829 %index.next = add i64 %index, 8 1830 %loop = icmp eq i64 %index.next, 16384 1831 br i1 %loop, label %for.end, label %vector.body 1832 1833 for.end: ; preds = %vector.body 1834 ret void 1835 1836 ; SSE4: test62: 1837 ; SSE4: pmaxuw 1838 1839 ; AVX1: test62: 1840 ; AVX1: vpmaxuw 1841 1842 ; AVX2: test62: 1843 ; AVX2: vpmaxuw 1844 } 1845 1846 define void @test63(i16* nocapture %a, i16* nocapture %b) nounwind { 1847 vector.ph: 1848 br label %vector.body 1849 1850 vector.body: ; preds = %vector.body, %vector.ph 1851 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1852 %gep.a = getelementptr inbounds i16* %a, i64 %index 1853 %gep.b = getelementptr inbounds i16* %b, i64 %index 1854 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 1855 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 1856 %load.a = load <8 x i16>* %ptr.a, align 2 1857 %load.b = load <8 x i16>* %ptr.b, align 2 1858 %cmp = icmp ugt <8 x i16> %load.a, %load.b 1859 %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a 1860 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 1861 %index.next = add i64 %index, 8 1862 %loop = icmp eq i64 %index.next, 16384 1863 br i1 %loop, label %for.end, label %vector.body 1864 1865 for.end: ; preds = %vector.body 1866 ret void 1867 1868 ; SSE4: test63: 1869 ; SSE4: pminuw 1870 1871 ; AVX1: test63: 1872 ; AVX1: vpminuw 1873 1874 ; AVX2: test63: 1875 ; AVX2: vpminuw 1876 } 1877 1878 define void @test64(i16* nocapture %a, i16* nocapture %b) nounwind { 1879 vector.ph: 1880 br label %vector.body 1881 1882 vector.body: ; preds = %vector.body, %vector.ph 1883 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1884 %gep.a = getelementptr inbounds i16* %a, i64 %index 1885 %gep.b = getelementptr inbounds i16* %b, i64 %index 1886 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 1887 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 1888 %load.a = load <8 x i16>* %ptr.a, align 2 1889 %load.b = load <8 x i16>* %ptr.b, align 2 1890 %cmp = icmp uge <8 x i16> %load.a, %load.b 1891 %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a 1892 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 1893 %index.next = add i64 %index, 8 1894 %loop = icmp eq i64 %index.next, 16384 1895 br i1 %loop, label %for.end, label %vector.body 1896 1897 for.end: ; preds = %vector.body 1898 ret void 1899 1900 ; SSE4: test64: 1901 ; SSE4: pminuw 1902 1903 ; AVX1: test64: 1904 ; AVX1: vpminuw 1905 1906 ; AVX2: test64: 1907 ; AVX2: vpminuw 1908 } 1909 1910 define void @test65(i32* nocapture %a, i32* nocapture %b) nounwind { 1911 vector.ph: 1912 br label %vector.body 1913 1914 vector.body: ; preds = %vector.body, %vector.ph 1915 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1916 %gep.a = getelementptr inbounds i32* %a, i64 %index 1917 %gep.b = getelementptr inbounds i32* %b, i64 %index 1918 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 1919 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 1920 %load.a = load <4 x i32>* %ptr.a, align 2 1921 %load.b = load <4 x i32>* %ptr.b, align 2 1922 %cmp = icmp slt <4 x i32> %load.a, %load.b 1923 %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a 1924 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 1925 %index.next = add i64 %index, 4 1926 %loop = icmp eq i64 %index.next, 16384 1927 br i1 %loop, label %for.end, label %vector.body 1928 1929 for.end: ; preds = %vector.body 1930 ret void 1931 1932 ; SSE4: test65: 1933 ; SSE4: pmaxsd 1934 1935 ; AVX1: test65: 1936 ; AVX1: vpmaxsd 1937 1938 ; AVX2: test65: 1939 ; AVX2: vpmaxsd 1940 } 1941 1942 define void @test66(i32* nocapture %a, i32* nocapture %b) nounwind { 1943 vector.ph: 1944 br label %vector.body 1945 1946 vector.body: ; preds = %vector.body, %vector.ph 1947 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1948 %gep.a = getelementptr inbounds i32* %a, i64 %index 1949 %gep.b = getelementptr inbounds i32* %b, i64 %index 1950 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 1951 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 1952 %load.a = load <4 x i32>* %ptr.a, align 2 1953 %load.b = load <4 x i32>* %ptr.b, align 2 1954 %cmp = icmp sle <4 x i32> %load.a, %load.b 1955 %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a 1956 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 1957 %index.next = add i64 %index, 4 1958 %loop = icmp eq i64 %index.next, 16384 1959 br i1 %loop, label %for.end, label %vector.body 1960 1961 for.end: ; preds = %vector.body 1962 ret void 1963 1964 ; SSE4: test66: 1965 ; SSE4: pmaxsd 1966 1967 ; AVX1: test66: 1968 ; AVX1: vpmaxsd 1969 1970 ; AVX2: test66: 1971 ; AVX2: vpmaxsd 1972 } 1973 1974 define void @test67(i32* nocapture %a, i32* nocapture %b) nounwind { 1975 vector.ph: 1976 br label %vector.body 1977 1978 vector.body: ; preds = %vector.body, %vector.ph 1979 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1980 %gep.a = getelementptr inbounds i32* %a, i64 %index 1981 %gep.b = getelementptr inbounds i32* %b, i64 %index 1982 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 1983 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 1984 %load.a = load <4 x i32>* %ptr.a, align 2 1985 %load.b = load <4 x i32>* %ptr.b, align 2 1986 %cmp = icmp sgt <4 x i32> %load.a, %load.b 1987 %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a 1988 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 1989 %index.next = add i64 %index, 4 1990 %loop = icmp eq i64 %index.next, 16384 1991 br i1 %loop, label %for.end, label %vector.body 1992 1993 for.end: ; preds = %vector.body 1994 ret void 1995 1996 ; SSE4: test67: 1997 ; SSE4: pminsd 1998 1999 ; AVX1: test67: 2000 ; AVX1: vpminsd 2001 2002 ; AVX2: test67: 2003 ; AVX2: vpminsd 2004 } 2005 2006 define void @test68(i32* nocapture %a, i32* nocapture %b) nounwind { 2007 vector.ph: 2008 br label %vector.body 2009 2010 vector.body: ; preds = %vector.body, %vector.ph 2011 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2012 %gep.a = getelementptr inbounds i32* %a, i64 %index 2013 %gep.b = getelementptr inbounds i32* %b, i64 %index 2014 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 2015 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 2016 %load.a = load <4 x i32>* %ptr.a, align 2 2017 %load.b = load <4 x i32>* %ptr.b, align 2 2018 %cmp = icmp sge <4 x i32> %load.a, %load.b 2019 %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a 2020 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 2021 %index.next = add i64 %index, 4 2022 %loop = icmp eq i64 %index.next, 16384 2023 br i1 %loop, label %for.end, label %vector.body 2024 2025 for.end: ; preds = %vector.body 2026 ret void 2027 2028 ; SSE4: test68: 2029 ; SSE4: pminsd 2030 2031 ; AVX1: test68: 2032 ; AVX1: vpminsd 2033 2034 ; AVX2: test68: 2035 ; AVX2: vpminsd 2036 } 2037 2038 define void @test69(i32* nocapture %a, i32* nocapture %b) nounwind { 2039 vector.ph: 2040 br label %vector.body 2041 2042 vector.body: ; preds = %vector.body, %vector.ph 2043 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2044 %gep.a = getelementptr inbounds i32* %a, i64 %index 2045 %gep.b = getelementptr inbounds i32* %b, i64 %index 2046 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 2047 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 2048 %load.a = load <4 x i32>* %ptr.a, align 2 2049 %load.b = load <4 x i32>* %ptr.b, align 2 2050 %cmp = icmp ult <4 x i32> %load.a, %load.b 2051 %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a 2052 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 2053 %index.next = add i64 %index, 4 2054 %loop = icmp eq i64 %index.next, 16384 2055 br i1 %loop, label %for.end, label %vector.body 2056 2057 for.end: ; preds = %vector.body 2058 ret void 2059 2060 ; SSE4: test69: 2061 ; SSE4: pmaxud 2062 2063 ; AVX1: test69: 2064 ; AVX1: vpmaxud 2065 2066 ; AVX2: test69: 2067 ; AVX2: vpmaxud 2068 } 2069 2070 define void @test70(i32* nocapture %a, i32* nocapture %b) nounwind { 2071 vector.ph: 2072 br label %vector.body 2073 2074 vector.body: ; preds = %vector.body, %vector.ph 2075 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2076 %gep.a = getelementptr inbounds i32* %a, i64 %index 2077 %gep.b = getelementptr inbounds i32* %b, i64 %index 2078 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 2079 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 2080 %load.a = load <4 x i32>* %ptr.a, align 2 2081 %load.b = load <4 x i32>* %ptr.b, align 2 2082 %cmp = icmp ule <4 x i32> %load.a, %load.b 2083 %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a 2084 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 2085 %index.next = add i64 %index, 4 2086 %loop = icmp eq i64 %index.next, 16384 2087 br i1 %loop, label %for.end, label %vector.body 2088 2089 for.end: ; preds = %vector.body 2090 ret void 2091 2092 ; SSE4: test70: 2093 ; SSE4: pmaxud 2094 2095 ; AVX1: test70: 2096 ; AVX1: vpmaxud 2097 2098 ; AVX2: test70: 2099 ; AVX2: vpmaxud 2100 } 2101 2102 define void @test71(i32* nocapture %a, i32* nocapture %b) nounwind { 2103 vector.ph: 2104 br label %vector.body 2105 2106 vector.body: ; preds = %vector.body, %vector.ph 2107 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2108 %gep.a = getelementptr inbounds i32* %a, i64 %index 2109 %gep.b = getelementptr inbounds i32* %b, i64 %index 2110 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 2111 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 2112 %load.a = load <4 x i32>* %ptr.a, align 2 2113 %load.b = load <4 x i32>* %ptr.b, align 2 2114 %cmp = icmp ugt <4 x i32> %load.a, %load.b 2115 %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a 2116 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 2117 %index.next = add i64 %index, 4 2118 %loop = icmp eq i64 %index.next, 16384 2119 br i1 %loop, label %for.end, label %vector.body 2120 2121 for.end: ; preds = %vector.body 2122 ret void 2123 2124 ; SSE4: test71: 2125 ; SSE4: pminud 2126 2127 ; AVX1: test71: 2128 ; AVX1: vpminud 2129 2130 ; AVX2: test71: 2131 ; AVX2: vpminud 2132 } 2133 2134 define void @test72(i32* nocapture %a, i32* nocapture %b) nounwind { 2135 vector.ph: 2136 br label %vector.body 2137 2138 vector.body: ; preds = %vector.body, %vector.ph 2139 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2140 %gep.a = getelementptr inbounds i32* %a, i64 %index 2141 %gep.b = getelementptr inbounds i32* %b, i64 %index 2142 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 2143 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 2144 %load.a = load <4 x i32>* %ptr.a, align 2 2145 %load.b = load <4 x i32>* %ptr.b, align 2 2146 %cmp = icmp uge <4 x i32> %load.a, %load.b 2147 %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a 2148 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 2149 %index.next = add i64 %index, 4 2150 %loop = icmp eq i64 %index.next, 16384 2151 br i1 %loop, label %for.end, label %vector.body 2152 2153 for.end: ; preds = %vector.body 2154 ret void 2155 2156 ; SSE4: test72: 2157 ; SSE4: pminud 2158 2159 ; AVX1: test72: 2160 ; AVX1: vpminud 2161 2162 ; AVX2: test72: 2163 ; AVX2: vpminud 2164 } 2165 2166 define void @test73(i8* nocapture %a, i8* nocapture %b) nounwind { 2167 vector.ph: 2168 br label %vector.body 2169 2170 vector.body: ; preds = %vector.body, %vector.ph 2171 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2172 %gep.a = getelementptr inbounds i8* %a, i64 %index 2173 %gep.b = getelementptr inbounds i8* %b, i64 %index 2174 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 2175 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 2176 %load.a = load <32 x i8>* %ptr.a, align 2 2177 %load.b = load <32 x i8>* %ptr.b, align 2 2178 %cmp = icmp slt <32 x i8> %load.a, %load.b 2179 %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a 2180 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 2181 %index.next = add i64 %index, 32 2182 %loop = icmp eq i64 %index.next, 16384 2183 br i1 %loop, label %for.end, label %vector.body 2184 2185 for.end: ; preds = %vector.body 2186 ret void 2187 2188 ; AVX2: test73: 2189 ; AVX2: vpmaxsb 2190 } 2191 2192 define void @test74(i8* nocapture %a, i8* nocapture %b) nounwind { 2193 vector.ph: 2194 br label %vector.body 2195 2196 vector.body: ; preds = %vector.body, %vector.ph 2197 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2198 %gep.a = getelementptr inbounds i8* %a, i64 %index 2199 %gep.b = getelementptr inbounds i8* %b, i64 %index 2200 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 2201 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 2202 %load.a = load <32 x i8>* %ptr.a, align 2 2203 %load.b = load <32 x i8>* %ptr.b, align 2 2204 %cmp = icmp sle <32 x i8> %load.a, %load.b 2205 %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a 2206 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 2207 %index.next = add i64 %index, 32 2208 %loop = icmp eq i64 %index.next, 16384 2209 br i1 %loop, label %for.end, label %vector.body 2210 2211 for.end: ; preds = %vector.body 2212 ret void 2213 2214 ; AVX2: test74: 2215 ; AVX2: vpmaxsb 2216 } 2217 2218 define void @test75(i8* nocapture %a, i8* nocapture %b) nounwind { 2219 vector.ph: 2220 br label %vector.body 2221 2222 vector.body: ; preds = %vector.body, %vector.ph 2223 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2224 %gep.a = getelementptr inbounds i8* %a, i64 %index 2225 %gep.b = getelementptr inbounds i8* %b, i64 %index 2226 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 2227 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 2228 %load.a = load <32 x i8>* %ptr.a, align 2 2229 %load.b = load <32 x i8>* %ptr.b, align 2 2230 %cmp = icmp sgt <32 x i8> %load.a, %load.b 2231 %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a 2232 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 2233 %index.next = add i64 %index, 32 2234 %loop = icmp eq i64 %index.next, 16384 2235 br i1 %loop, label %for.end, label %vector.body 2236 2237 for.end: ; preds = %vector.body 2238 ret void 2239 2240 ; AVX2: test75: 2241 ; AVX2: vpminsb 2242 } 2243 2244 define void @test76(i8* nocapture %a, i8* nocapture %b) nounwind { 2245 vector.ph: 2246 br label %vector.body 2247 2248 vector.body: ; preds = %vector.body, %vector.ph 2249 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2250 %gep.a = getelementptr inbounds i8* %a, i64 %index 2251 %gep.b = getelementptr inbounds i8* %b, i64 %index 2252 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 2253 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 2254 %load.a = load <32 x i8>* %ptr.a, align 2 2255 %load.b = load <32 x i8>* %ptr.b, align 2 2256 %cmp = icmp sge <32 x i8> %load.a, %load.b 2257 %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a 2258 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 2259 %index.next = add i64 %index, 32 2260 %loop = icmp eq i64 %index.next, 16384 2261 br i1 %loop, label %for.end, label %vector.body 2262 2263 for.end: ; preds = %vector.body 2264 ret void 2265 2266 ; AVX2: test76: 2267 ; AVX2: vpminsb 2268 } 2269 2270 define void @test77(i8* nocapture %a, i8* nocapture %b) nounwind { 2271 vector.ph: 2272 br label %vector.body 2273 2274 vector.body: ; preds = %vector.body, %vector.ph 2275 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2276 %gep.a = getelementptr inbounds i8* %a, i64 %index 2277 %gep.b = getelementptr inbounds i8* %b, i64 %index 2278 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 2279 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 2280 %load.a = load <32 x i8>* %ptr.a, align 2 2281 %load.b = load <32 x i8>* %ptr.b, align 2 2282 %cmp = icmp ult <32 x i8> %load.a, %load.b 2283 %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a 2284 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 2285 %index.next = add i64 %index, 32 2286 %loop = icmp eq i64 %index.next, 16384 2287 br i1 %loop, label %for.end, label %vector.body 2288 2289 for.end: ; preds = %vector.body 2290 ret void 2291 2292 ; AVX2: test77: 2293 ; AVX2: vpmaxub 2294 } 2295 2296 define void @test78(i8* nocapture %a, i8* nocapture %b) nounwind { 2297 vector.ph: 2298 br label %vector.body 2299 2300 vector.body: ; preds = %vector.body, %vector.ph 2301 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2302 %gep.a = getelementptr inbounds i8* %a, i64 %index 2303 %gep.b = getelementptr inbounds i8* %b, i64 %index 2304 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 2305 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 2306 %load.a = load <32 x i8>* %ptr.a, align 2 2307 %load.b = load <32 x i8>* %ptr.b, align 2 2308 %cmp = icmp ule <32 x i8> %load.a, %load.b 2309 %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a 2310 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 2311 %index.next = add i64 %index, 32 2312 %loop = icmp eq i64 %index.next, 16384 2313 br i1 %loop, label %for.end, label %vector.body 2314 2315 for.end: ; preds = %vector.body 2316 ret void 2317 2318 ; AVX2: test78: 2319 ; AVX2: vpmaxub 2320 } 2321 2322 define void @test79(i8* nocapture %a, i8* nocapture %b) nounwind { 2323 vector.ph: 2324 br label %vector.body 2325 2326 vector.body: ; preds = %vector.body, %vector.ph 2327 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2328 %gep.a = getelementptr inbounds i8* %a, i64 %index 2329 %gep.b = getelementptr inbounds i8* %b, i64 %index 2330 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 2331 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 2332 %load.a = load <32 x i8>* %ptr.a, align 2 2333 %load.b = load <32 x i8>* %ptr.b, align 2 2334 %cmp = icmp ugt <32 x i8> %load.a, %load.b 2335 %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a 2336 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 2337 %index.next = add i64 %index, 32 2338 %loop = icmp eq i64 %index.next, 16384 2339 br i1 %loop, label %for.end, label %vector.body 2340 2341 for.end: ; preds = %vector.body 2342 ret void 2343 2344 ; AVX2: test79: 2345 ; AVX2: vpminub 2346 } 2347 2348 define void @test80(i8* nocapture %a, i8* nocapture %b) nounwind { 2349 vector.ph: 2350 br label %vector.body 2351 2352 vector.body: ; preds = %vector.body, %vector.ph 2353 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2354 %gep.a = getelementptr inbounds i8* %a, i64 %index 2355 %gep.b = getelementptr inbounds i8* %b, i64 %index 2356 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 2357 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 2358 %load.a = load <32 x i8>* %ptr.a, align 2 2359 %load.b = load <32 x i8>* %ptr.b, align 2 2360 %cmp = icmp uge <32 x i8> %load.a, %load.b 2361 %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a 2362 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 2363 %index.next = add i64 %index, 32 2364 %loop = icmp eq i64 %index.next, 16384 2365 br i1 %loop, label %for.end, label %vector.body 2366 2367 for.end: ; preds = %vector.body 2368 ret void 2369 2370 ; AVX2: test80: 2371 ; AVX2: vpminub 2372 } 2373 2374 define void @test81(i16* nocapture %a, i16* nocapture %b) nounwind { 2375 vector.ph: 2376 br label %vector.body 2377 2378 vector.body: ; preds = %vector.body, %vector.ph 2379 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2380 %gep.a = getelementptr inbounds i16* %a, i64 %index 2381 %gep.b = getelementptr inbounds i16* %b, i64 %index 2382 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 2383 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 2384 %load.a = load <16 x i16>* %ptr.a, align 2 2385 %load.b = load <16 x i16>* %ptr.b, align 2 2386 %cmp = icmp slt <16 x i16> %load.a, %load.b 2387 %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a 2388 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 2389 %index.next = add i64 %index, 16 2390 %loop = icmp eq i64 %index.next, 16384 2391 br i1 %loop, label %for.end, label %vector.body 2392 2393 for.end: ; preds = %vector.body 2394 ret void 2395 2396 ; AVX2: test81: 2397 ; AVX2: vpmaxsw 2398 } 2399 2400 define void @test82(i16* nocapture %a, i16* nocapture %b) nounwind { 2401 vector.ph: 2402 br label %vector.body 2403 2404 vector.body: ; preds = %vector.body, %vector.ph 2405 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2406 %gep.a = getelementptr inbounds i16* %a, i64 %index 2407 %gep.b = getelementptr inbounds i16* %b, i64 %index 2408 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 2409 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 2410 %load.a = load <16 x i16>* %ptr.a, align 2 2411 %load.b = load <16 x i16>* %ptr.b, align 2 2412 %cmp = icmp sle <16 x i16> %load.a, %load.b 2413 %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a 2414 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 2415 %index.next = add i64 %index, 16 2416 %loop = icmp eq i64 %index.next, 16384 2417 br i1 %loop, label %for.end, label %vector.body 2418 2419 for.end: ; preds = %vector.body 2420 ret void 2421 2422 ; AVX2: test82: 2423 ; AVX2: vpmaxsw 2424 } 2425 2426 define void @test83(i16* nocapture %a, i16* nocapture %b) nounwind { 2427 vector.ph: 2428 br label %vector.body 2429 2430 vector.body: ; preds = %vector.body, %vector.ph 2431 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2432 %gep.a = getelementptr inbounds i16* %a, i64 %index 2433 %gep.b = getelementptr inbounds i16* %b, i64 %index 2434 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 2435 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 2436 %load.a = load <16 x i16>* %ptr.a, align 2 2437 %load.b = load <16 x i16>* %ptr.b, align 2 2438 %cmp = icmp sgt <16 x i16> %load.a, %load.b 2439 %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a 2440 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 2441 %index.next = add i64 %index, 16 2442 %loop = icmp eq i64 %index.next, 16384 2443 br i1 %loop, label %for.end, label %vector.body 2444 2445 for.end: ; preds = %vector.body 2446 ret void 2447 2448 ; AVX2: test83: 2449 ; AVX2: vpminsw 2450 } 2451 2452 define void @test84(i16* nocapture %a, i16* nocapture %b) nounwind { 2453 vector.ph: 2454 br label %vector.body 2455 2456 vector.body: ; preds = %vector.body, %vector.ph 2457 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2458 %gep.a = getelementptr inbounds i16* %a, i64 %index 2459 %gep.b = getelementptr inbounds i16* %b, i64 %index 2460 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 2461 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 2462 %load.a = load <16 x i16>* %ptr.a, align 2 2463 %load.b = load <16 x i16>* %ptr.b, align 2 2464 %cmp = icmp sge <16 x i16> %load.a, %load.b 2465 %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a 2466 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 2467 %index.next = add i64 %index, 16 2468 %loop = icmp eq i64 %index.next, 16384 2469 br i1 %loop, label %for.end, label %vector.body 2470 2471 for.end: ; preds = %vector.body 2472 ret void 2473 2474 ; AVX2: test84: 2475 ; AVX2: vpminsw 2476 } 2477 2478 define void @test85(i16* nocapture %a, i16* nocapture %b) nounwind { 2479 vector.ph: 2480 br label %vector.body 2481 2482 vector.body: ; preds = %vector.body, %vector.ph 2483 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2484 %gep.a = getelementptr inbounds i16* %a, i64 %index 2485 %gep.b = getelementptr inbounds i16* %b, i64 %index 2486 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 2487 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 2488 %load.a = load <16 x i16>* %ptr.a, align 2 2489 %load.b = load <16 x i16>* %ptr.b, align 2 2490 %cmp = icmp ult <16 x i16> %load.a, %load.b 2491 %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a 2492 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 2493 %index.next = add i64 %index, 16 2494 %loop = icmp eq i64 %index.next, 16384 2495 br i1 %loop, label %for.end, label %vector.body 2496 2497 for.end: ; preds = %vector.body 2498 ret void 2499 2500 ; AVX2: test85: 2501 ; AVX2: vpmaxuw 2502 } 2503 2504 define void @test86(i16* nocapture %a, i16* nocapture %b) nounwind { 2505 vector.ph: 2506 br label %vector.body 2507 2508 vector.body: ; preds = %vector.body, %vector.ph 2509 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2510 %gep.a = getelementptr inbounds i16* %a, i64 %index 2511 %gep.b = getelementptr inbounds i16* %b, i64 %index 2512 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 2513 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 2514 %load.a = load <16 x i16>* %ptr.a, align 2 2515 %load.b = load <16 x i16>* %ptr.b, align 2 2516 %cmp = icmp ule <16 x i16> %load.a, %load.b 2517 %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a 2518 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 2519 %index.next = add i64 %index, 16 2520 %loop = icmp eq i64 %index.next, 16384 2521 br i1 %loop, label %for.end, label %vector.body 2522 2523 for.end: ; preds = %vector.body 2524 ret void 2525 2526 ; AVX2: test86: 2527 ; AVX2: vpmaxuw 2528 } 2529 2530 define void @test87(i16* nocapture %a, i16* nocapture %b) nounwind { 2531 vector.ph: 2532 br label %vector.body 2533 2534 vector.body: ; preds = %vector.body, %vector.ph 2535 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2536 %gep.a = getelementptr inbounds i16* %a, i64 %index 2537 %gep.b = getelementptr inbounds i16* %b, i64 %index 2538 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 2539 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 2540 %load.a = load <16 x i16>* %ptr.a, align 2 2541 %load.b = load <16 x i16>* %ptr.b, align 2 2542 %cmp = icmp ugt <16 x i16> %load.a, %load.b 2543 %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a 2544 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 2545 %index.next = add i64 %index, 16 2546 %loop = icmp eq i64 %index.next, 16384 2547 br i1 %loop, label %for.end, label %vector.body 2548 2549 for.end: ; preds = %vector.body 2550 ret void 2551 2552 ; AVX2: test87: 2553 ; AVX2: vpminuw 2554 } 2555 2556 define void @test88(i16* nocapture %a, i16* nocapture %b) nounwind { 2557 vector.ph: 2558 br label %vector.body 2559 2560 vector.body: ; preds = %vector.body, %vector.ph 2561 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2562 %gep.a = getelementptr inbounds i16* %a, i64 %index 2563 %gep.b = getelementptr inbounds i16* %b, i64 %index 2564 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 2565 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 2566 %load.a = load <16 x i16>* %ptr.a, align 2 2567 %load.b = load <16 x i16>* %ptr.b, align 2 2568 %cmp = icmp uge <16 x i16> %load.a, %load.b 2569 %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a 2570 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 2571 %index.next = add i64 %index, 16 2572 %loop = icmp eq i64 %index.next, 16384 2573 br i1 %loop, label %for.end, label %vector.body 2574 2575 for.end: ; preds = %vector.body 2576 ret void 2577 2578 ; AVX2: test88: 2579 ; AVX2: vpminuw 2580 } 2581 2582 define void @test89(i32* nocapture %a, i32* nocapture %b) nounwind { 2583 vector.ph: 2584 br label %vector.body 2585 2586 vector.body: ; preds = %vector.body, %vector.ph 2587 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2588 %gep.a = getelementptr inbounds i32* %a, i64 %index 2589 %gep.b = getelementptr inbounds i32* %b, i64 %index 2590 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 2591 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 2592 %load.a = load <8 x i32>* %ptr.a, align 2 2593 %load.b = load <8 x i32>* %ptr.b, align 2 2594 %cmp = icmp slt <8 x i32> %load.a, %load.b 2595 %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a 2596 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 2597 %index.next = add i64 %index, 8 2598 %loop = icmp eq i64 %index.next, 16384 2599 br i1 %loop, label %for.end, label %vector.body 2600 2601 for.end: ; preds = %vector.body 2602 ret void 2603 2604 ; AVX2: test89: 2605 ; AVX2: vpmaxsd 2606 } 2607 2608 define void @test90(i32* nocapture %a, i32* nocapture %b) nounwind { 2609 vector.ph: 2610 br label %vector.body 2611 2612 vector.body: ; preds = %vector.body, %vector.ph 2613 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2614 %gep.a = getelementptr inbounds i32* %a, i64 %index 2615 %gep.b = getelementptr inbounds i32* %b, i64 %index 2616 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 2617 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 2618 %load.a = load <8 x i32>* %ptr.a, align 2 2619 %load.b = load <8 x i32>* %ptr.b, align 2 2620 %cmp = icmp sle <8 x i32> %load.a, %load.b 2621 %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a 2622 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 2623 %index.next = add i64 %index, 8 2624 %loop = icmp eq i64 %index.next, 16384 2625 br i1 %loop, label %for.end, label %vector.body 2626 2627 for.end: ; preds = %vector.body 2628 ret void 2629 2630 ; AVX2: test90: 2631 ; AVX2: vpmaxsd 2632 } 2633 2634 define void @test91(i32* nocapture %a, i32* nocapture %b) nounwind { 2635 vector.ph: 2636 br label %vector.body 2637 2638 vector.body: ; preds = %vector.body, %vector.ph 2639 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2640 %gep.a = getelementptr inbounds i32* %a, i64 %index 2641 %gep.b = getelementptr inbounds i32* %b, i64 %index 2642 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 2643 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 2644 %load.a = load <8 x i32>* %ptr.a, align 2 2645 %load.b = load <8 x i32>* %ptr.b, align 2 2646 %cmp = icmp sgt <8 x i32> %load.a, %load.b 2647 %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a 2648 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 2649 %index.next = add i64 %index, 8 2650 %loop = icmp eq i64 %index.next, 16384 2651 br i1 %loop, label %for.end, label %vector.body 2652 2653 for.end: ; preds = %vector.body 2654 ret void 2655 2656 ; AVX2: test91: 2657 ; AVX2: vpminsd 2658 } 2659 2660 define void @test92(i32* nocapture %a, i32* nocapture %b) nounwind { 2661 vector.ph: 2662 br label %vector.body 2663 2664 vector.body: ; preds = %vector.body, %vector.ph 2665 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2666 %gep.a = getelementptr inbounds i32* %a, i64 %index 2667 %gep.b = getelementptr inbounds i32* %b, i64 %index 2668 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 2669 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 2670 %load.a = load <8 x i32>* %ptr.a, align 2 2671 %load.b = load <8 x i32>* %ptr.b, align 2 2672 %cmp = icmp sge <8 x i32> %load.a, %load.b 2673 %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a 2674 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 2675 %index.next = add i64 %index, 8 2676 %loop = icmp eq i64 %index.next, 16384 2677 br i1 %loop, label %for.end, label %vector.body 2678 2679 for.end: ; preds = %vector.body 2680 ret void 2681 2682 ; AVX2: test92: 2683 ; AVX2: vpminsd 2684 } 2685 2686 define void @test93(i32* nocapture %a, i32* nocapture %b) nounwind { 2687 vector.ph: 2688 br label %vector.body 2689 2690 vector.body: ; preds = %vector.body, %vector.ph 2691 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2692 %gep.a = getelementptr inbounds i32* %a, i64 %index 2693 %gep.b = getelementptr inbounds i32* %b, i64 %index 2694 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 2695 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 2696 %load.a = load <8 x i32>* %ptr.a, align 2 2697 %load.b = load <8 x i32>* %ptr.b, align 2 2698 %cmp = icmp ult <8 x i32> %load.a, %load.b 2699 %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a 2700 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 2701 %index.next = add i64 %index, 8 2702 %loop = icmp eq i64 %index.next, 16384 2703 br i1 %loop, label %for.end, label %vector.body 2704 2705 for.end: ; preds = %vector.body 2706 ret void 2707 2708 ; AVX2: test93: 2709 ; AVX2: vpmaxud 2710 } 2711 2712 define void @test94(i32* nocapture %a, i32* nocapture %b) nounwind { 2713 vector.ph: 2714 br label %vector.body 2715 2716 vector.body: ; preds = %vector.body, %vector.ph 2717 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2718 %gep.a = getelementptr inbounds i32* %a, i64 %index 2719 %gep.b = getelementptr inbounds i32* %b, i64 %index 2720 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 2721 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 2722 %load.a = load <8 x i32>* %ptr.a, align 2 2723 %load.b = load <8 x i32>* %ptr.b, align 2 2724 %cmp = icmp ule <8 x i32> %load.a, %load.b 2725 %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a 2726 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 2727 %index.next = add i64 %index, 8 2728 %loop = icmp eq i64 %index.next, 16384 2729 br i1 %loop, label %for.end, label %vector.body 2730 2731 for.end: ; preds = %vector.body 2732 ret void 2733 2734 ; AVX2: test94: 2735 ; AVX2: vpmaxud 2736 } 2737 2738 define void @test95(i32* nocapture %a, i32* nocapture %b) nounwind { 2739 vector.ph: 2740 br label %vector.body 2741 2742 vector.body: ; preds = %vector.body, %vector.ph 2743 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2744 %gep.a = getelementptr inbounds i32* %a, i64 %index 2745 %gep.b = getelementptr inbounds i32* %b, i64 %index 2746 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 2747 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 2748 %load.a = load <8 x i32>* %ptr.a, align 2 2749 %load.b = load <8 x i32>* %ptr.b, align 2 2750 %cmp = icmp ugt <8 x i32> %load.a, %load.b 2751 %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a 2752 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 2753 %index.next = add i64 %index, 8 2754 %loop = icmp eq i64 %index.next, 16384 2755 br i1 %loop, label %for.end, label %vector.body 2756 2757 for.end: ; preds = %vector.body 2758 ret void 2759 2760 ; AVX2: test95: 2761 ; AVX2: vpminud 2762 } 2763 2764 define void @test96(i32* nocapture %a, i32* nocapture %b) nounwind { 2765 vector.ph: 2766 br label %vector.body 2767 2768 vector.body: ; preds = %vector.body, %vector.ph 2769 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2770 %gep.a = getelementptr inbounds i32* %a, i64 %index 2771 %gep.b = getelementptr inbounds i32* %b, i64 %index 2772 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 2773 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 2774 %load.a = load <8 x i32>* %ptr.a, align 2 2775 %load.b = load <8 x i32>* %ptr.b, align 2 2776 %cmp = icmp uge <8 x i32> %load.a, %load.b 2777 %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a 2778 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 2779 %index.next = add i64 %index, 8 2780 %loop = icmp eq i64 %index.next, 16384 2781 br i1 %loop, label %for.end, label %vector.body 2782 2783 for.end: ; preds = %vector.body 2784 ret void 2785 2786 ; AVX2: test96: 2787 ; AVX2: vpminud 2788 } 2789