Home | History | Annotate | Download | only in X86
      1 ; RUN: llc -march=x86-64 -mcpu=core2 < %s | FileCheck %s -check-prefix=SSE2
      2 ; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s -check-prefix=SSE4
      3 ; RUN: llc -march=x86-64 -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1
      4 ; RUN: llc -march=x86-64 -mcpu=core-avx2 -mattr=+avx2 < %s | FileCheck %s -check-prefix=AVX2
      5 
      6 define void @test1(i8* nocapture %a, i8* nocapture %b) nounwind {
      7 vector.ph:
      8   br label %vector.body
      9 
     10 vector.body:                                      ; preds = %vector.body, %vector.ph
     11   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
     12   %gep.a = getelementptr inbounds i8* %a, i64 %index
     13   %gep.b = getelementptr inbounds i8* %b, i64 %index
     14   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
     15   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
     16   %load.a = load <16 x i8>* %ptr.a, align 2
     17   %load.b = load <16 x i8>* %ptr.b, align 2
     18   %cmp = icmp slt <16 x i8> %load.a, %load.b
     19   %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
     20   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
     21   %index.next = add i64 %index, 16
     22   %loop = icmp eq i64 %index.next, 16384
     23   br i1 %loop, label %for.end, label %vector.body
     24 
     25 for.end:                                          ; preds = %vector.body
     26   ret void
     27 
     28 ; SSE4: test1:
     29 ; SSE4: pminsb
     30 
     31 ; AVX1: test1:
     32 ; AVX1: vpminsb
     33 
     34 ; AVX2: test1:
     35 ; AVX2: vpminsb
     36 }
     37 
     38 define void @test2(i8* nocapture %a, i8* nocapture %b) nounwind {
     39 vector.ph:
     40   br label %vector.body
     41 
     42 vector.body:                                      ; preds = %vector.body, %vector.ph
     43   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
     44   %gep.a = getelementptr inbounds i8* %a, i64 %index
     45   %gep.b = getelementptr inbounds i8* %b, i64 %index
     46   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
     47   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
     48   %load.a = load <16 x i8>* %ptr.a, align 2
     49   %load.b = load <16 x i8>* %ptr.b, align 2
     50   %cmp = icmp sle <16 x i8> %load.a, %load.b
     51   %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
     52   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
     53   %index.next = add i64 %index, 16
     54   %loop = icmp eq i64 %index.next, 16384
     55   br i1 %loop, label %for.end, label %vector.body
     56 
     57 for.end:                                          ; preds = %vector.body
     58   ret void
     59 
     60 ; SSE4: test2:
     61 ; SSE4: pminsb
     62 
     63 ; AVX1: test2:
     64 ; AVX1: vpminsb
     65 
     66 ; AVX2: test2:
     67 ; AVX2: vpminsb
     68 }
     69 
     70 define void @test3(i8* nocapture %a, i8* nocapture %b) nounwind {
     71 vector.ph:
     72   br label %vector.body
     73 
     74 vector.body:                                      ; preds = %vector.body, %vector.ph
     75   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
     76   %gep.a = getelementptr inbounds i8* %a, i64 %index
     77   %gep.b = getelementptr inbounds i8* %b, i64 %index
     78   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
     79   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
     80   %load.a = load <16 x i8>* %ptr.a, align 2
     81   %load.b = load <16 x i8>* %ptr.b, align 2
     82   %cmp = icmp sgt <16 x i8> %load.a, %load.b
     83   %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
     84   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
     85   %index.next = add i64 %index, 16
     86   %loop = icmp eq i64 %index.next, 16384
     87   br i1 %loop, label %for.end, label %vector.body
     88 
     89 for.end:                                          ; preds = %vector.body
     90   ret void
     91 
     92 ; SSE4: test3:
     93 ; SSE4: pmaxsb
     94 
     95 ; AVX1: test3:
     96 ; AVX1: vpmaxsb
     97 
     98 ; AVX2: test3:
     99 ; AVX2: vpmaxsb
    100 }
    101 
    102 define void @test4(i8* nocapture %a, i8* nocapture %b) nounwind {
    103 vector.ph:
    104   br label %vector.body
    105 
    106 vector.body:                                      ; preds = %vector.body, %vector.ph
    107   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    108   %gep.a = getelementptr inbounds i8* %a, i64 %index
    109   %gep.b = getelementptr inbounds i8* %b, i64 %index
    110   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
    111   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
    112   %load.a = load <16 x i8>* %ptr.a, align 2
    113   %load.b = load <16 x i8>* %ptr.b, align 2
    114   %cmp = icmp sge <16 x i8> %load.a, %load.b
    115   %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
    116   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
    117   %index.next = add i64 %index, 16
    118   %loop = icmp eq i64 %index.next, 16384
    119   br i1 %loop, label %for.end, label %vector.body
    120 
    121 for.end:                                          ; preds = %vector.body
    122   ret void
    123 
    124 ; SSE4: test4:
    125 ; SSE4: pmaxsb
    126 
    127 ; AVX1: test4:
    128 ; AVX1: vpmaxsb
    129 
    130 ; AVX2: test4:
    131 ; AVX2: vpmaxsb
    132 }
    133 
    134 define void @test5(i8* nocapture %a, i8* nocapture %b) nounwind {
    135 vector.ph:
    136   br label %vector.body
    137 
    138 vector.body:                                      ; preds = %vector.body, %vector.ph
    139   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    140   %gep.a = getelementptr inbounds i8* %a, i64 %index
    141   %gep.b = getelementptr inbounds i8* %b, i64 %index
    142   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
    143   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
    144   %load.a = load <16 x i8>* %ptr.a, align 2
    145   %load.b = load <16 x i8>* %ptr.b, align 2
    146   %cmp = icmp ult <16 x i8> %load.a, %load.b
    147   %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
    148   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
    149   %index.next = add i64 %index, 16
    150   %loop = icmp eq i64 %index.next, 16384
    151   br i1 %loop, label %for.end, label %vector.body
    152 
    153 for.end:                                          ; preds = %vector.body
    154   ret void
    155 
    156 ; SSE2: test5:
    157 ; SSE2: pminub
    158 
    159 ; AVX1: test5:
    160 ; AVX1: vpminub
    161 
    162 ; AVX2: test5:
    163 ; AVX2: vpminub
    164 }
    165 
    166 define void @test6(i8* nocapture %a, i8* nocapture %b) nounwind {
    167 vector.ph:
    168   br label %vector.body
    169 
    170 vector.body:                                      ; preds = %vector.body, %vector.ph
    171   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    172   %gep.a = getelementptr inbounds i8* %a, i64 %index
    173   %gep.b = getelementptr inbounds i8* %b, i64 %index
    174   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
    175   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
    176   %load.a = load <16 x i8>* %ptr.a, align 2
    177   %load.b = load <16 x i8>* %ptr.b, align 2
    178   %cmp = icmp ule <16 x i8> %load.a, %load.b
    179   %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
    180   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
    181   %index.next = add i64 %index, 16
    182   %loop = icmp eq i64 %index.next, 16384
    183   br i1 %loop, label %for.end, label %vector.body
    184 
    185 for.end:                                          ; preds = %vector.body
    186   ret void
    187 
    188 ; SSE2: test6:
    189 ; SSE2: pminub
    190 
    191 ; AVX1: test6:
    192 ; AVX1: vpminub
    193 
    194 ; AVX2: test6:
    195 ; AVX2: vpminub
    196 }
    197 
    198 define void @test7(i8* nocapture %a, i8* nocapture %b) nounwind {
    199 vector.ph:
    200   br label %vector.body
    201 
    202 vector.body:                                      ; preds = %vector.body, %vector.ph
    203   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    204   %gep.a = getelementptr inbounds i8* %a, i64 %index
    205   %gep.b = getelementptr inbounds i8* %b, i64 %index
    206   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
    207   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
    208   %load.a = load <16 x i8>* %ptr.a, align 2
    209   %load.b = load <16 x i8>* %ptr.b, align 2
    210   %cmp = icmp ugt <16 x i8> %load.a, %load.b
    211   %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
    212   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
    213   %index.next = add i64 %index, 16
    214   %loop = icmp eq i64 %index.next, 16384
    215   br i1 %loop, label %for.end, label %vector.body
    216 
    217 for.end:                                          ; preds = %vector.body
    218   ret void
    219 
    220 ; SSE2: test7:
    221 ; SSE2: pmaxub
    222 
    223 ; AVX1: test7:
    224 ; AVX1: vpmaxub
    225 
    226 ; AVX2: test7:
    227 ; AVX2: vpmaxub
    228 }
    229 
    230 define void @test8(i8* nocapture %a, i8* nocapture %b) nounwind {
    231 vector.ph:
    232   br label %vector.body
    233 
    234 vector.body:                                      ; preds = %vector.body, %vector.ph
    235   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    236   %gep.a = getelementptr inbounds i8* %a, i64 %index
    237   %gep.b = getelementptr inbounds i8* %b, i64 %index
    238   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
    239   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
    240   %load.a = load <16 x i8>* %ptr.a, align 2
    241   %load.b = load <16 x i8>* %ptr.b, align 2
    242   %cmp = icmp uge <16 x i8> %load.a, %load.b
    243   %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
    244   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
    245   %index.next = add i64 %index, 16
    246   %loop = icmp eq i64 %index.next, 16384
    247   br i1 %loop, label %for.end, label %vector.body
    248 
    249 for.end:                                          ; preds = %vector.body
    250   ret void
    251 
    252 ; SSE2: test8:
    253 ; SSE2: pmaxub
    254 
    255 ; AVX1: test8:
    256 ; AVX1: vpmaxub
    257 
    258 ; AVX2: test8:
    259 ; AVX2: vpmaxub
    260 }
    261 
    262 define void @test9(i16* nocapture %a, i16* nocapture %b) nounwind {
    263 vector.ph:
    264   br label %vector.body
    265 
    266 vector.body:                                      ; preds = %vector.body, %vector.ph
    267   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    268   %gep.a = getelementptr inbounds i16* %a, i64 %index
    269   %gep.b = getelementptr inbounds i16* %b, i64 %index
    270   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
    271   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
    272   %load.a = load <8 x i16>* %ptr.a, align 2
    273   %load.b = load <8 x i16>* %ptr.b, align 2
    274   %cmp = icmp slt <8 x i16> %load.a, %load.b
    275   %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
    276   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
    277   %index.next = add i64 %index, 8
    278   %loop = icmp eq i64 %index.next, 16384
    279   br i1 %loop, label %for.end, label %vector.body
    280 
    281 for.end:                                          ; preds = %vector.body
    282   ret void
    283 
    284 ; SSE2: test9:
    285 ; SSE2: pminsw
    286 
    287 ; AVX1: test9:
    288 ; AVX1: vpminsw
    289 
    290 ; AVX2: test9:
    291 ; AVX2: vpminsw
    292 }
    293 
    294 define void @test10(i16* nocapture %a, i16* nocapture %b) nounwind {
    295 vector.ph:
    296   br label %vector.body
    297 
    298 vector.body:                                      ; preds = %vector.body, %vector.ph
    299   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    300   %gep.a = getelementptr inbounds i16* %a, i64 %index
    301   %gep.b = getelementptr inbounds i16* %b, i64 %index
    302   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
    303   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
    304   %load.a = load <8 x i16>* %ptr.a, align 2
    305   %load.b = load <8 x i16>* %ptr.b, align 2
    306   %cmp = icmp sle <8 x i16> %load.a, %load.b
    307   %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
    308   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
    309   %index.next = add i64 %index, 8
    310   %loop = icmp eq i64 %index.next, 16384
    311   br i1 %loop, label %for.end, label %vector.body
    312 
    313 for.end:                                          ; preds = %vector.body
    314   ret void
    315 
    316 ; SSE2: test10:
    317 ; SSE2: pminsw
    318 
    319 ; AVX1: test10:
    320 ; AVX1: vpminsw
    321 
    322 ; AVX2: test10:
    323 ; AVX2: vpminsw
    324 }
    325 
    326 define void @test11(i16* nocapture %a, i16* nocapture %b) nounwind {
    327 vector.ph:
    328   br label %vector.body
    329 
    330 vector.body:                                      ; preds = %vector.body, %vector.ph
    331   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    332   %gep.a = getelementptr inbounds i16* %a, i64 %index
    333   %gep.b = getelementptr inbounds i16* %b, i64 %index
    334   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
    335   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
    336   %load.a = load <8 x i16>* %ptr.a, align 2
    337   %load.b = load <8 x i16>* %ptr.b, align 2
    338   %cmp = icmp sgt <8 x i16> %load.a, %load.b
    339   %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
    340   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
    341   %index.next = add i64 %index, 8
    342   %loop = icmp eq i64 %index.next, 16384
    343   br i1 %loop, label %for.end, label %vector.body
    344 
    345 for.end:                                          ; preds = %vector.body
    346   ret void
    347 
    348 ; SSE2: test11:
    349 ; SSE2: pmaxsw
    350 
    351 ; AVX1: test11:
    352 ; AVX1: vpmaxsw
    353 
    354 ; AVX2: test11:
    355 ; AVX2: vpmaxsw
    356 }
    357 
    358 define void @test12(i16* nocapture %a, i16* nocapture %b) nounwind {
    359 vector.ph:
    360   br label %vector.body
    361 
    362 vector.body:                                      ; preds = %vector.body, %vector.ph
    363   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    364   %gep.a = getelementptr inbounds i16* %a, i64 %index
    365   %gep.b = getelementptr inbounds i16* %b, i64 %index
    366   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
    367   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
    368   %load.a = load <8 x i16>* %ptr.a, align 2
    369   %load.b = load <8 x i16>* %ptr.b, align 2
    370   %cmp = icmp sge <8 x i16> %load.a, %load.b
    371   %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
    372   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
    373   %index.next = add i64 %index, 8
    374   %loop = icmp eq i64 %index.next, 16384
    375   br i1 %loop, label %for.end, label %vector.body
    376 
    377 for.end:                                          ; preds = %vector.body
    378   ret void
    379 
    380 ; SSE2: test12:
    381 ; SSE2: pmaxsw
    382 
    383 ; AVX1: test12:
    384 ; AVX1: vpmaxsw
    385 
    386 ; AVX2: test12:
    387 ; AVX2: vpmaxsw
    388 }
    389 
    390 define void @test13(i16* nocapture %a, i16* nocapture %b) nounwind {
    391 vector.ph:
    392   br label %vector.body
    393 
    394 vector.body:                                      ; preds = %vector.body, %vector.ph
    395   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    396   %gep.a = getelementptr inbounds i16* %a, i64 %index
    397   %gep.b = getelementptr inbounds i16* %b, i64 %index
    398   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
    399   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
    400   %load.a = load <8 x i16>* %ptr.a, align 2
    401   %load.b = load <8 x i16>* %ptr.b, align 2
    402   %cmp = icmp ult <8 x i16> %load.a, %load.b
    403   %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
    404   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
    405   %index.next = add i64 %index, 8
    406   %loop = icmp eq i64 %index.next, 16384
    407   br i1 %loop, label %for.end, label %vector.body
    408 
    409 for.end:                                          ; preds = %vector.body
    410   ret void
    411 
    412 ; SSE4: test13:
    413 ; SSE4: pminuw
    414 
    415 ; AVX1: test13:
    416 ; AVX1: vpminuw
    417 
    418 ; AVX2: test13:
    419 ; AVX2: vpminuw
    420 }
    421 
    422 define void @test14(i16* nocapture %a, i16* nocapture %b) nounwind {
    423 vector.ph:
    424   br label %vector.body
    425 
    426 vector.body:                                      ; preds = %vector.body, %vector.ph
    427   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    428   %gep.a = getelementptr inbounds i16* %a, i64 %index
    429   %gep.b = getelementptr inbounds i16* %b, i64 %index
    430   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
    431   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
    432   %load.a = load <8 x i16>* %ptr.a, align 2
    433   %load.b = load <8 x i16>* %ptr.b, align 2
    434   %cmp = icmp ule <8 x i16> %load.a, %load.b
    435   %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
    436   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
    437   %index.next = add i64 %index, 8
    438   %loop = icmp eq i64 %index.next, 16384
    439   br i1 %loop, label %for.end, label %vector.body
    440 
    441 for.end:                                          ; preds = %vector.body
    442   ret void
    443 
    444 ; SSE4: test14:
    445 ; SSE4: pminuw
    446 
    447 ; AVX1: test14:
    448 ; AVX1: vpminuw
    449 
    450 ; AVX2: test14:
    451 ; AVX2: vpminuw
    452 }
    453 
    454 define void @test15(i16* nocapture %a, i16* nocapture %b) nounwind {
    455 vector.ph:
    456   br label %vector.body
    457 
    458 vector.body:                                      ; preds = %vector.body, %vector.ph
    459   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    460   %gep.a = getelementptr inbounds i16* %a, i64 %index
    461   %gep.b = getelementptr inbounds i16* %b, i64 %index
    462   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
    463   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
    464   %load.a = load <8 x i16>* %ptr.a, align 2
    465   %load.b = load <8 x i16>* %ptr.b, align 2
    466   %cmp = icmp ugt <8 x i16> %load.a, %load.b
    467   %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
    468   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
    469   %index.next = add i64 %index, 8
    470   %loop = icmp eq i64 %index.next, 16384
    471   br i1 %loop, label %for.end, label %vector.body
    472 
    473 for.end:                                          ; preds = %vector.body
    474   ret void
    475 
    476 ; SSE4: test15:
    477 ; SSE4: pmaxuw
    478 
    479 ; AVX1: test15:
    480 ; AVX1: vpmaxuw
    481 
    482 ; AVX2: test15:
    483 ; AVX2: vpmaxuw
    484 }
    485 
    486 define void @test16(i16* nocapture %a, i16* nocapture %b) nounwind {
    487 vector.ph:
    488   br label %vector.body
    489 
    490 vector.body:                                      ; preds = %vector.body, %vector.ph
    491   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    492   %gep.a = getelementptr inbounds i16* %a, i64 %index
    493   %gep.b = getelementptr inbounds i16* %b, i64 %index
    494   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
    495   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
    496   %load.a = load <8 x i16>* %ptr.a, align 2
    497   %load.b = load <8 x i16>* %ptr.b, align 2
    498   %cmp = icmp uge <8 x i16> %load.a, %load.b
    499   %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
    500   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
    501   %index.next = add i64 %index, 8
    502   %loop = icmp eq i64 %index.next, 16384
    503   br i1 %loop, label %for.end, label %vector.body
    504 
    505 for.end:                                          ; preds = %vector.body
    506   ret void
    507 
    508 ; SSE4: test16:
    509 ; SSE4: pmaxuw
    510 
    511 ; AVX1: test16:
    512 ; AVX1: vpmaxuw
    513 
    514 ; AVX2: test16:
    515 ; AVX2: vpmaxuw
    516 }
    517 
    518 define void @test17(i32* nocapture %a, i32* nocapture %b) nounwind {
    519 vector.ph:
    520   br label %vector.body
    521 
    522 vector.body:                                      ; preds = %vector.body, %vector.ph
    523   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    524   %gep.a = getelementptr inbounds i32* %a, i64 %index
    525   %gep.b = getelementptr inbounds i32* %b, i64 %index
    526   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
    527   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
    528   %load.a = load <4 x i32>* %ptr.a, align 2
    529   %load.b = load <4 x i32>* %ptr.b, align 2
    530   %cmp = icmp slt <4 x i32> %load.a, %load.b
    531   %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
    532   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
    533   %index.next = add i64 %index, 4
    534   %loop = icmp eq i64 %index.next, 16384
    535   br i1 %loop, label %for.end, label %vector.body
    536 
    537 for.end:                                          ; preds = %vector.body
    538   ret void
    539 
    540 ; SSE4: test17:
    541 ; SSE4: pminsd
    542 
    543 ; AVX1: test17:
    544 ; AVX1: vpminsd
    545 
    546 ; AVX2: test17:
    547 ; AVX2: vpminsd
    548 }
    549 
    550 define void @test18(i32* nocapture %a, i32* nocapture %b) nounwind {
    551 vector.ph:
    552   br label %vector.body
    553 
    554 vector.body:                                      ; preds = %vector.body, %vector.ph
    555   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    556   %gep.a = getelementptr inbounds i32* %a, i64 %index
    557   %gep.b = getelementptr inbounds i32* %b, i64 %index
    558   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
    559   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
    560   %load.a = load <4 x i32>* %ptr.a, align 2
    561   %load.b = load <4 x i32>* %ptr.b, align 2
    562   %cmp = icmp sle <4 x i32> %load.a, %load.b
    563   %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
    564   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
    565   %index.next = add i64 %index, 4
    566   %loop = icmp eq i64 %index.next, 16384
    567   br i1 %loop, label %for.end, label %vector.body
    568 
    569 for.end:                                          ; preds = %vector.body
    570   ret void
    571 
    572 ; SSE4: test18:
    573 ; SSE4: pminsd
    574 
    575 ; AVX1: test18:
    576 ; AVX1: vpminsd
    577 
    578 ; AVX2: test18:
    579 ; AVX2: vpminsd
    580 }
    581 
    582 define void @test19(i32* nocapture %a, i32* nocapture %b) nounwind {
    583 vector.ph:
    584   br label %vector.body
    585 
    586 vector.body:                                      ; preds = %vector.body, %vector.ph
    587   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    588   %gep.a = getelementptr inbounds i32* %a, i64 %index
    589   %gep.b = getelementptr inbounds i32* %b, i64 %index
    590   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
    591   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
    592   %load.a = load <4 x i32>* %ptr.a, align 2
    593   %load.b = load <4 x i32>* %ptr.b, align 2
    594   %cmp = icmp sgt <4 x i32> %load.a, %load.b
    595   %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
    596   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
    597   %index.next = add i64 %index, 4
    598   %loop = icmp eq i64 %index.next, 16384
    599   br i1 %loop, label %for.end, label %vector.body
    600 
    601 for.end:                                          ; preds = %vector.body
    602   ret void
    603 
    604 ; SSE4: test19:
    605 ; SSE4: pmaxsd
    606 
    607 ; AVX1: test19:
    608 ; AVX1: vpmaxsd
    609 
    610 ; AVX2: test19:
    611 ; AVX2: vpmaxsd
    612 }
    613 
    614 define void @test20(i32* nocapture %a, i32* nocapture %b) nounwind {
    615 vector.ph:
    616   br label %vector.body
    617 
    618 vector.body:                                      ; preds = %vector.body, %vector.ph
    619   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    620   %gep.a = getelementptr inbounds i32* %a, i64 %index
    621   %gep.b = getelementptr inbounds i32* %b, i64 %index
    622   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
    623   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
    624   %load.a = load <4 x i32>* %ptr.a, align 2
    625   %load.b = load <4 x i32>* %ptr.b, align 2
    626   %cmp = icmp sge <4 x i32> %load.a, %load.b
    627   %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
    628   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
    629   %index.next = add i64 %index, 4
    630   %loop = icmp eq i64 %index.next, 16384
    631   br i1 %loop, label %for.end, label %vector.body
    632 
    633 for.end:                                          ; preds = %vector.body
    634   ret void
    635 
    636 ; SSE4: test20:
    637 ; SSE4: pmaxsd
    638 
    639 ; AVX1: test20:
    640 ; AVX1: vpmaxsd
    641 
    642 ; AVX2: test20:
    643 ; AVX2: vpmaxsd
    644 }
    645 
    646 define void @test21(i32* nocapture %a, i32* nocapture %b) nounwind {
    647 vector.ph:
    648   br label %vector.body
    649 
    650 vector.body:                                      ; preds = %vector.body, %vector.ph
    651   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    652   %gep.a = getelementptr inbounds i32* %a, i64 %index
    653   %gep.b = getelementptr inbounds i32* %b, i64 %index
    654   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
    655   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
    656   %load.a = load <4 x i32>* %ptr.a, align 2
    657   %load.b = load <4 x i32>* %ptr.b, align 2
    658   %cmp = icmp ult <4 x i32> %load.a, %load.b
    659   %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
    660   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
    661   %index.next = add i64 %index, 4
    662   %loop = icmp eq i64 %index.next, 16384
    663   br i1 %loop, label %for.end, label %vector.body
    664 
    665 for.end:                                          ; preds = %vector.body
    666   ret void
    667 
    668 ; SSE4: test21:
    669 ; SSE4: pminud
    670 
    671 ; AVX1: test21:
    672 ; AVX1: vpminud
    673 
    674 ; AVX2: test21:
    675 ; AVX2: vpminud
    676 }
    677 
    678 define void @test22(i32* nocapture %a, i32* nocapture %b) nounwind {
    679 vector.ph:
    680   br label %vector.body
    681 
    682 vector.body:                                      ; preds = %vector.body, %vector.ph
    683   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    684   %gep.a = getelementptr inbounds i32* %a, i64 %index
    685   %gep.b = getelementptr inbounds i32* %b, i64 %index
    686   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
    687   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
    688   %load.a = load <4 x i32>* %ptr.a, align 2
    689   %load.b = load <4 x i32>* %ptr.b, align 2
    690   %cmp = icmp ule <4 x i32> %load.a, %load.b
    691   %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
    692   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
    693   %index.next = add i64 %index, 4
    694   %loop = icmp eq i64 %index.next, 16384
    695   br i1 %loop, label %for.end, label %vector.body
    696 
    697 for.end:                                          ; preds = %vector.body
    698   ret void
    699 
    700 ; SSE4: test22:
    701 ; SSE4: pminud
    702 
    703 ; AVX1: test22:
    704 ; AVX1: vpminud
    705 
    706 ; AVX2: test22:
    707 ; AVX2: vpminud
    708 }
    709 
    710 define void @test23(i32* nocapture %a, i32* nocapture %b) nounwind {
    711 vector.ph:
    712   br label %vector.body
    713 
    714 vector.body:                                      ; preds = %vector.body, %vector.ph
    715   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    716   %gep.a = getelementptr inbounds i32* %a, i64 %index
    717   %gep.b = getelementptr inbounds i32* %b, i64 %index
    718   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
    719   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
    720   %load.a = load <4 x i32>* %ptr.a, align 2
    721   %load.b = load <4 x i32>* %ptr.b, align 2
    722   %cmp = icmp ugt <4 x i32> %load.a, %load.b
    723   %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
    724   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
    725   %index.next = add i64 %index, 4
    726   %loop = icmp eq i64 %index.next, 16384
    727   br i1 %loop, label %for.end, label %vector.body
    728 
    729 for.end:                                          ; preds = %vector.body
    730   ret void
    731 
    732 ; SSE4: test23:
    733 ; SSE4: pmaxud
    734 
    735 ; AVX1: test23:
    736 ; AVX1: vpmaxud
    737 
    738 ; AVX2: test23:
    739 ; AVX2: vpmaxud
    740 }
    741 
    742 define void @test24(i32* nocapture %a, i32* nocapture %b) nounwind {
    743 vector.ph:
    744   br label %vector.body
    745 
    746 vector.body:                                      ; preds = %vector.body, %vector.ph
    747   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    748   %gep.a = getelementptr inbounds i32* %a, i64 %index
    749   %gep.b = getelementptr inbounds i32* %b, i64 %index
    750   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
    751   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
    752   %load.a = load <4 x i32>* %ptr.a, align 2
    753   %load.b = load <4 x i32>* %ptr.b, align 2
    754   %cmp = icmp uge <4 x i32> %load.a, %load.b
    755   %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
    756   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
    757   %index.next = add i64 %index, 4
    758   %loop = icmp eq i64 %index.next, 16384
    759   br i1 %loop, label %for.end, label %vector.body
    760 
    761 for.end:                                          ; preds = %vector.body
    762   ret void
    763 
    764 ; SSE4: test24:
    765 ; SSE4: pmaxud
    766 
    767 ; AVX1: test24:
    768 ; AVX1: vpmaxud
    769 
    770 ; AVX2: test24:
    771 ; AVX2: vpmaxud
    772 }
    773 
    774 define void @test25(i8* nocapture %a, i8* nocapture %b) nounwind {
    775 vector.ph:
    776   br label %vector.body
    777 
    778 vector.body:                                      ; preds = %vector.body, %vector.ph
    779   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    780   %gep.a = getelementptr inbounds i8* %a, i64 %index
    781   %gep.b = getelementptr inbounds i8* %b, i64 %index
    782   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
    783   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
    784   %load.a = load <32 x i8>* %ptr.a, align 2
    785   %load.b = load <32 x i8>* %ptr.b, align 2
    786   %cmp = icmp slt <32 x i8> %load.a, %load.b
    787   %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
    788   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
    789   %index.next = add i64 %index, 32
    790   %loop = icmp eq i64 %index.next, 16384
    791   br i1 %loop, label %for.end, label %vector.body
    792 
    793 for.end:                                          ; preds = %vector.body
    794   ret void
    795 
    796 ; AVX2: test25:
    797 ; AVX2: vpminsb
    798 }
    799 
    800 define void @test26(i8* nocapture %a, i8* nocapture %b) nounwind {
    801 vector.ph:
    802   br label %vector.body
    803 
    804 vector.body:                                      ; preds = %vector.body, %vector.ph
    805   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    806   %gep.a = getelementptr inbounds i8* %a, i64 %index
    807   %gep.b = getelementptr inbounds i8* %b, i64 %index
    808   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
    809   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
    810   %load.a = load <32 x i8>* %ptr.a, align 2
    811   %load.b = load <32 x i8>* %ptr.b, align 2
    812   %cmp = icmp sle <32 x i8> %load.a, %load.b
    813   %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
    814   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
    815   %index.next = add i64 %index, 32
    816   %loop = icmp eq i64 %index.next, 16384
    817   br i1 %loop, label %for.end, label %vector.body
    818 
    819 for.end:                                          ; preds = %vector.body
    820   ret void
    821 
    822 ; AVX2: test26:
    823 ; AVX2: vpminsb
    824 }
    825 
    826 define void @test27(i8* nocapture %a, i8* nocapture %b) nounwind {
    827 vector.ph:
    828   br label %vector.body
    829 
    830 vector.body:                                      ; preds = %vector.body, %vector.ph
    831   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    832   %gep.a = getelementptr inbounds i8* %a, i64 %index
    833   %gep.b = getelementptr inbounds i8* %b, i64 %index
    834   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
    835   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
    836   %load.a = load <32 x i8>* %ptr.a, align 2
    837   %load.b = load <32 x i8>* %ptr.b, align 2
    838   %cmp = icmp sgt <32 x i8> %load.a, %load.b
    839   %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
    840   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
    841   %index.next = add i64 %index, 32
    842   %loop = icmp eq i64 %index.next, 16384
    843   br i1 %loop, label %for.end, label %vector.body
    844 
    845 for.end:                                          ; preds = %vector.body
    846   ret void
    847 
    848 ; AVX2: test27:
    849 ; AVX2: vpmaxsb
    850 }
    851 
    852 define void @test28(i8* nocapture %a, i8* nocapture %b) nounwind {
    853 vector.ph:
    854   br label %vector.body
    855 
    856 vector.body:                                      ; preds = %vector.body, %vector.ph
    857   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    858   %gep.a = getelementptr inbounds i8* %a, i64 %index
    859   %gep.b = getelementptr inbounds i8* %b, i64 %index
    860   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
    861   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
    862   %load.a = load <32 x i8>* %ptr.a, align 2
    863   %load.b = load <32 x i8>* %ptr.b, align 2
    864   %cmp = icmp sge <32 x i8> %load.a, %load.b
    865   %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
    866   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
    867   %index.next = add i64 %index, 32
    868   %loop = icmp eq i64 %index.next, 16384
    869   br i1 %loop, label %for.end, label %vector.body
    870 
    871 for.end:                                          ; preds = %vector.body
    872   ret void
    873 
    874 ; AVX2: test28:
    875 ; AVX2: vpmaxsb
    876 }
    877 
    878 define void @test29(i8* nocapture %a, i8* nocapture %b) nounwind {
    879 vector.ph:
    880   br label %vector.body
    881 
    882 vector.body:                                      ; preds = %vector.body, %vector.ph
    883   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    884   %gep.a = getelementptr inbounds i8* %a, i64 %index
    885   %gep.b = getelementptr inbounds i8* %b, i64 %index
    886   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
    887   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
    888   %load.a = load <32 x i8>* %ptr.a, align 2
    889   %load.b = load <32 x i8>* %ptr.b, align 2
    890   %cmp = icmp ult <32 x i8> %load.a, %load.b
    891   %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
    892   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
    893   %index.next = add i64 %index, 32
    894   %loop = icmp eq i64 %index.next, 16384
    895   br i1 %loop, label %for.end, label %vector.body
    896 
    897 for.end:                                          ; preds = %vector.body
    898   ret void
    899 
    900 ; AVX2: test29:
    901 ; AVX2: vpminub
    902 }
    903 
    904 define void @test30(i8* nocapture %a, i8* nocapture %b) nounwind {
    905 vector.ph:
    906   br label %vector.body
    907 
    908 vector.body:                                      ; preds = %vector.body, %vector.ph
    909   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    910   %gep.a = getelementptr inbounds i8* %a, i64 %index
    911   %gep.b = getelementptr inbounds i8* %b, i64 %index
    912   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
    913   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
    914   %load.a = load <32 x i8>* %ptr.a, align 2
    915   %load.b = load <32 x i8>* %ptr.b, align 2
    916   %cmp = icmp ule <32 x i8> %load.a, %load.b
    917   %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
    918   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
    919   %index.next = add i64 %index, 32
    920   %loop = icmp eq i64 %index.next, 16384
    921   br i1 %loop, label %for.end, label %vector.body
    922 
    923 for.end:                                          ; preds = %vector.body
    924   ret void
    925 
    926 ; AVX2: test30:
    927 ; AVX2: vpminub
    928 }
    929 
    930 define void @test31(i8* nocapture %a, i8* nocapture %b) nounwind {
    931 vector.ph:
    932   br label %vector.body
    933 
    934 vector.body:                                      ; preds = %vector.body, %vector.ph
    935   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    936   %gep.a = getelementptr inbounds i8* %a, i64 %index
    937   %gep.b = getelementptr inbounds i8* %b, i64 %index
    938   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
    939   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
    940   %load.a = load <32 x i8>* %ptr.a, align 2
    941   %load.b = load <32 x i8>* %ptr.b, align 2
    942   %cmp = icmp ugt <32 x i8> %load.a, %load.b
    943   %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
    944   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
    945   %index.next = add i64 %index, 32
    946   %loop = icmp eq i64 %index.next, 16384
    947   br i1 %loop, label %for.end, label %vector.body
    948 
    949 for.end:                                          ; preds = %vector.body
    950   ret void
    951 
    952 ; AVX2: test31:
    953 ; AVX2: vpmaxub
    954 }
    955 
    956 define void @test32(i8* nocapture %a, i8* nocapture %b) nounwind {
    957 vector.ph:
    958   br label %vector.body
    959 
    960 vector.body:                                      ; preds = %vector.body, %vector.ph
    961   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    962   %gep.a = getelementptr inbounds i8* %a, i64 %index
    963   %gep.b = getelementptr inbounds i8* %b, i64 %index
    964   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
    965   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
    966   %load.a = load <32 x i8>* %ptr.a, align 2
    967   %load.b = load <32 x i8>* %ptr.b, align 2
    968   %cmp = icmp uge <32 x i8> %load.a, %load.b
    969   %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
    970   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
    971   %index.next = add i64 %index, 32
    972   %loop = icmp eq i64 %index.next, 16384
    973   br i1 %loop, label %for.end, label %vector.body
    974 
    975 for.end:                                          ; preds = %vector.body
    976   ret void
    977 
    978 ; AVX2: test32:
    979 ; AVX2: vpmaxub
    980 }
    981 
    982 define void @test33(i16* nocapture %a, i16* nocapture %b) nounwind {
    983 vector.ph:
    984   br label %vector.body
    985 
    986 vector.body:                                      ; preds = %vector.body, %vector.ph
    987   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    988   %gep.a = getelementptr inbounds i16* %a, i64 %index
    989   %gep.b = getelementptr inbounds i16* %b, i64 %index
    990   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
    991   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
    992   %load.a = load <16 x i16>* %ptr.a, align 2
    993   %load.b = load <16 x i16>* %ptr.b, align 2
    994   %cmp = icmp slt <16 x i16> %load.a, %load.b
    995   %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
    996   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
    997   %index.next = add i64 %index, 16
    998   %loop = icmp eq i64 %index.next, 16384
    999   br i1 %loop, label %for.end, label %vector.body
   1000 
   1001 for.end:                                          ; preds = %vector.body
   1002   ret void
   1003 
   1004 ; AVX2: test33:
   1005 ; AVX2: vpminsw
   1006 }
   1007 
   1008 define void @test34(i16* nocapture %a, i16* nocapture %b) nounwind {
   1009 vector.ph:
   1010   br label %vector.body
   1011 
   1012 vector.body:                                      ; preds = %vector.body, %vector.ph
   1013   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1014   %gep.a = getelementptr inbounds i16* %a, i64 %index
   1015   %gep.b = getelementptr inbounds i16* %b, i64 %index
   1016   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   1017   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
   1018   %load.a = load <16 x i16>* %ptr.a, align 2
   1019   %load.b = load <16 x i16>* %ptr.b, align 2
   1020   %cmp = icmp sle <16 x i16> %load.a, %load.b
   1021   %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
   1022   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
   1023   %index.next = add i64 %index, 16
   1024   %loop = icmp eq i64 %index.next, 16384
   1025   br i1 %loop, label %for.end, label %vector.body
   1026 
   1027 for.end:                                          ; preds = %vector.body
   1028   ret void
   1029 
   1030 ; AVX2: test34:
   1031 ; AVX2: vpminsw
   1032 }
   1033 
   1034 define void @test35(i16* nocapture %a, i16* nocapture %b) nounwind {
   1035 vector.ph:
   1036   br label %vector.body
   1037 
   1038 vector.body:                                      ; preds = %vector.body, %vector.ph
   1039   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1040   %gep.a = getelementptr inbounds i16* %a, i64 %index
   1041   %gep.b = getelementptr inbounds i16* %b, i64 %index
   1042   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   1043   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
   1044   %load.a = load <16 x i16>* %ptr.a, align 2
   1045   %load.b = load <16 x i16>* %ptr.b, align 2
   1046   %cmp = icmp sgt <16 x i16> %load.a, %load.b
   1047   %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
   1048   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
   1049   %index.next = add i64 %index, 16
   1050   %loop = icmp eq i64 %index.next, 16384
   1051   br i1 %loop, label %for.end, label %vector.body
   1052 
   1053 for.end:                                          ; preds = %vector.body
   1054   ret void
   1055 
   1056 ; AVX2: test35:
   1057 ; AVX2: vpmaxsw
   1058 }
   1059 
   1060 define void @test36(i16* nocapture %a, i16* nocapture %b) nounwind {
   1061 vector.ph:
   1062   br label %vector.body
   1063 
   1064 vector.body:                                      ; preds = %vector.body, %vector.ph
   1065   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1066   %gep.a = getelementptr inbounds i16* %a, i64 %index
   1067   %gep.b = getelementptr inbounds i16* %b, i64 %index
   1068   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   1069   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
   1070   %load.a = load <16 x i16>* %ptr.a, align 2
   1071   %load.b = load <16 x i16>* %ptr.b, align 2
   1072   %cmp = icmp sge <16 x i16> %load.a, %load.b
   1073   %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
   1074   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
   1075   %index.next = add i64 %index, 16
   1076   %loop = icmp eq i64 %index.next, 16384
   1077   br i1 %loop, label %for.end, label %vector.body
   1078 
   1079 for.end:                                          ; preds = %vector.body
   1080   ret void
   1081 
   1082 ; AVX2: test36:
   1083 ; AVX2: vpmaxsw
   1084 }
   1085 
   1086 define void @test37(i16* nocapture %a, i16* nocapture %b) nounwind {
   1087 vector.ph:
   1088   br label %vector.body
   1089 
   1090 vector.body:                                      ; preds = %vector.body, %vector.ph
   1091   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1092   %gep.a = getelementptr inbounds i16* %a, i64 %index
   1093   %gep.b = getelementptr inbounds i16* %b, i64 %index
   1094   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   1095   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
   1096   %load.a = load <16 x i16>* %ptr.a, align 2
   1097   %load.b = load <16 x i16>* %ptr.b, align 2
   1098   %cmp = icmp ult <16 x i16> %load.a, %load.b
   1099   %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
   1100   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
   1101   %index.next = add i64 %index, 16
   1102   %loop = icmp eq i64 %index.next, 16384
   1103   br i1 %loop, label %for.end, label %vector.body
   1104 
   1105 for.end:                                          ; preds = %vector.body
   1106   ret void
   1107 
   1108 ; AVX2: test37:
   1109 ; AVX2: vpminuw
   1110 }
   1111 
   1112 define void @test38(i16* nocapture %a, i16* nocapture %b) nounwind {
   1113 vector.ph:
   1114   br label %vector.body
   1115 
   1116 vector.body:                                      ; preds = %vector.body, %vector.ph
   1117   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1118   %gep.a = getelementptr inbounds i16* %a, i64 %index
   1119   %gep.b = getelementptr inbounds i16* %b, i64 %index
   1120   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   1121   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
   1122   %load.a = load <16 x i16>* %ptr.a, align 2
   1123   %load.b = load <16 x i16>* %ptr.b, align 2
   1124   %cmp = icmp ule <16 x i16> %load.a, %load.b
   1125   %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
   1126   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
   1127   %index.next = add i64 %index, 16
   1128   %loop = icmp eq i64 %index.next, 16384
   1129   br i1 %loop, label %for.end, label %vector.body
   1130 
   1131 for.end:                                          ; preds = %vector.body
   1132   ret void
   1133 
   1134 ; AVX2: test38:
   1135 ; AVX2: vpminuw
   1136 }
   1137 
   1138 define void @test39(i16* nocapture %a, i16* nocapture %b) nounwind {
   1139 vector.ph:
   1140   br label %vector.body
   1141 
   1142 vector.body:                                      ; preds = %vector.body, %vector.ph
   1143   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1144   %gep.a = getelementptr inbounds i16* %a, i64 %index
   1145   %gep.b = getelementptr inbounds i16* %b, i64 %index
   1146   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   1147   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
   1148   %load.a = load <16 x i16>* %ptr.a, align 2
   1149   %load.b = load <16 x i16>* %ptr.b, align 2
   1150   %cmp = icmp ugt <16 x i16> %load.a, %load.b
   1151   %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
   1152   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
   1153   %index.next = add i64 %index, 16
   1154   %loop = icmp eq i64 %index.next, 16384
   1155   br i1 %loop, label %for.end, label %vector.body
   1156 
   1157 for.end:                                          ; preds = %vector.body
   1158   ret void
   1159 
   1160 ; AVX2: test39:
   1161 ; AVX2: vpmaxuw
   1162 }
   1163 
   1164 define void @test40(i16* nocapture %a, i16* nocapture %b) nounwind {
   1165 vector.ph:
   1166   br label %vector.body
   1167 
   1168 vector.body:                                      ; preds = %vector.body, %vector.ph
   1169   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1170   %gep.a = getelementptr inbounds i16* %a, i64 %index
   1171   %gep.b = getelementptr inbounds i16* %b, i64 %index
   1172   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   1173   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
   1174   %load.a = load <16 x i16>* %ptr.a, align 2
   1175   %load.b = load <16 x i16>* %ptr.b, align 2
   1176   %cmp = icmp uge <16 x i16> %load.a, %load.b
   1177   %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
   1178   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
   1179   %index.next = add i64 %index, 16
   1180   %loop = icmp eq i64 %index.next, 16384
   1181   br i1 %loop, label %for.end, label %vector.body
   1182 
   1183 for.end:                                          ; preds = %vector.body
   1184   ret void
   1185 
   1186 ; AVX2: test40:
   1187 ; AVX2: vpmaxuw
   1188 }
   1189 
   1190 define void @test41(i32* nocapture %a, i32* nocapture %b) nounwind {
   1191 vector.ph:
   1192   br label %vector.body
   1193 
   1194 vector.body:                                      ; preds = %vector.body, %vector.ph
   1195   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1196   %gep.a = getelementptr inbounds i32* %a, i64 %index
   1197   %gep.b = getelementptr inbounds i32* %b, i64 %index
   1198   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   1199   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
   1200   %load.a = load <8 x i32>* %ptr.a, align 2
   1201   %load.b = load <8 x i32>* %ptr.b, align 2
   1202   %cmp = icmp slt <8 x i32> %load.a, %load.b
   1203   %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
   1204   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
   1205   %index.next = add i64 %index, 8
   1206   %loop = icmp eq i64 %index.next, 16384
   1207   br i1 %loop, label %for.end, label %vector.body
   1208 
   1209 for.end:                                          ; preds = %vector.body
   1210   ret void
   1211 
   1212 ; AVX2: test41:
   1213 ; AVX2: vpminsd
   1214 }
   1215 
   1216 define void @test42(i32* nocapture %a, i32* nocapture %b) nounwind {
   1217 vector.ph:
   1218   br label %vector.body
   1219 
   1220 vector.body:                                      ; preds = %vector.body, %vector.ph
   1221   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1222   %gep.a = getelementptr inbounds i32* %a, i64 %index
   1223   %gep.b = getelementptr inbounds i32* %b, i64 %index
   1224   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   1225   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
   1226   %load.a = load <8 x i32>* %ptr.a, align 2
   1227   %load.b = load <8 x i32>* %ptr.b, align 2
   1228   %cmp = icmp sle <8 x i32> %load.a, %load.b
   1229   %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
   1230   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
   1231   %index.next = add i64 %index, 8
   1232   %loop = icmp eq i64 %index.next, 16384
   1233   br i1 %loop, label %for.end, label %vector.body
   1234 
   1235 for.end:                                          ; preds = %vector.body
   1236   ret void
   1237 
   1238 ; AVX2: test42:
   1239 ; AVX2: vpminsd
   1240 }
   1241 
   1242 define void @test43(i32* nocapture %a, i32* nocapture %b) nounwind {
   1243 vector.ph:
   1244   br label %vector.body
   1245 
   1246 vector.body:                                      ; preds = %vector.body, %vector.ph
   1247   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1248   %gep.a = getelementptr inbounds i32* %a, i64 %index
   1249   %gep.b = getelementptr inbounds i32* %b, i64 %index
   1250   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   1251   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
   1252   %load.a = load <8 x i32>* %ptr.a, align 2
   1253   %load.b = load <8 x i32>* %ptr.b, align 2
   1254   %cmp = icmp sgt <8 x i32> %load.a, %load.b
   1255   %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
   1256   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
   1257   %index.next = add i64 %index, 8
   1258   %loop = icmp eq i64 %index.next, 16384
   1259   br i1 %loop, label %for.end, label %vector.body
   1260 
   1261 for.end:                                          ; preds = %vector.body
   1262   ret void
   1263 
   1264 ; AVX2: test43:
   1265 ; AVX2: vpmaxsd
   1266 }
   1267 
   1268 define void @test44(i32* nocapture %a, i32* nocapture %b) nounwind {
   1269 vector.ph:
   1270   br label %vector.body
   1271 
   1272 vector.body:                                      ; preds = %vector.body, %vector.ph
   1273   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1274   %gep.a = getelementptr inbounds i32* %a, i64 %index
   1275   %gep.b = getelementptr inbounds i32* %b, i64 %index
   1276   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   1277   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
   1278   %load.a = load <8 x i32>* %ptr.a, align 2
   1279   %load.b = load <8 x i32>* %ptr.b, align 2
   1280   %cmp = icmp sge <8 x i32> %load.a, %load.b
   1281   %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
   1282   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
   1283   %index.next = add i64 %index, 8
   1284   %loop = icmp eq i64 %index.next, 16384
   1285   br i1 %loop, label %for.end, label %vector.body
   1286 
   1287 for.end:                                          ; preds = %vector.body
   1288   ret void
   1289 
   1290 ; AVX2: test44:
   1291 ; AVX2: vpmaxsd
   1292 }
   1293 
   1294 define void @test45(i32* nocapture %a, i32* nocapture %b) nounwind {
   1295 vector.ph:
   1296   br label %vector.body
   1297 
   1298 vector.body:                                      ; preds = %vector.body, %vector.ph
   1299   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1300   %gep.a = getelementptr inbounds i32* %a, i64 %index
   1301   %gep.b = getelementptr inbounds i32* %b, i64 %index
   1302   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   1303   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
   1304   %load.a = load <8 x i32>* %ptr.a, align 2
   1305   %load.b = load <8 x i32>* %ptr.b, align 2
   1306   %cmp = icmp ult <8 x i32> %load.a, %load.b
   1307   %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
   1308   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
   1309   %index.next = add i64 %index, 8
   1310   %loop = icmp eq i64 %index.next, 16384
   1311   br i1 %loop, label %for.end, label %vector.body
   1312 
   1313 for.end:                                          ; preds = %vector.body
   1314   ret void
   1315 
   1316 ; AVX2: test45:
   1317 ; AVX2: vpminud
   1318 }
   1319 
   1320 define void @test46(i32* nocapture %a, i32* nocapture %b) nounwind {
   1321 vector.ph:
   1322   br label %vector.body
   1323 
   1324 vector.body:                                      ; preds = %vector.body, %vector.ph
   1325   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1326   %gep.a = getelementptr inbounds i32* %a, i64 %index
   1327   %gep.b = getelementptr inbounds i32* %b, i64 %index
   1328   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   1329   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
   1330   %load.a = load <8 x i32>* %ptr.a, align 2
   1331   %load.b = load <8 x i32>* %ptr.b, align 2
   1332   %cmp = icmp ule <8 x i32> %load.a, %load.b
   1333   %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
   1334   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
   1335   %index.next = add i64 %index, 8
   1336   %loop = icmp eq i64 %index.next, 16384
   1337   br i1 %loop, label %for.end, label %vector.body
   1338 
   1339 for.end:                                          ; preds = %vector.body
   1340   ret void
   1341 
   1342 ; AVX2: test46:
   1343 ; AVX2: vpminud
   1344 }
   1345 
   1346 define void @test47(i32* nocapture %a, i32* nocapture %b) nounwind {
   1347 vector.ph:
   1348   br label %vector.body
   1349 
   1350 vector.body:                                      ; preds = %vector.body, %vector.ph
   1351   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1352   %gep.a = getelementptr inbounds i32* %a, i64 %index
   1353   %gep.b = getelementptr inbounds i32* %b, i64 %index
   1354   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   1355   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
   1356   %load.a = load <8 x i32>* %ptr.a, align 2
   1357   %load.b = load <8 x i32>* %ptr.b, align 2
   1358   %cmp = icmp ugt <8 x i32> %load.a, %load.b
   1359   %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
   1360   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
   1361   %index.next = add i64 %index, 8
   1362   %loop = icmp eq i64 %index.next, 16384
   1363   br i1 %loop, label %for.end, label %vector.body
   1364 
   1365 for.end:                                          ; preds = %vector.body
   1366   ret void
   1367 
   1368 ; AVX2: test47:
   1369 ; AVX2: vpmaxud
   1370 }
   1371 
   1372 define void @test48(i32* nocapture %a, i32* nocapture %b) nounwind {
   1373 vector.ph:
   1374   br label %vector.body
   1375 
   1376 vector.body:                                      ; preds = %vector.body, %vector.ph
   1377   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1378   %gep.a = getelementptr inbounds i32* %a, i64 %index
   1379   %gep.b = getelementptr inbounds i32* %b, i64 %index
   1380   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   1381   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
   1382   %load.a = load <8 x i32>* %ptr.a, align 2
   1383   %load.b = load <8 x i32>* %ptr.b, align 2
   1384   %cmp = icmp uge <8 x i32> %load.a, %load.b
   1385   %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
   1386   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
   1387   %index.next = add i64 %index, 8
   1388   %loop = icmp eq i64 %index.next, 16384
   1389   br i1 %loop, label %for.end, label %vector.body
   1390 
   1391 for.end:                                          ; preds = %vector.body
   1392   ret void
   1393 
   1394 ; AVX2: test48:
   1395 ; AVX2: vpmaxud
   1396 }
   1397 
   1398 define void @test49(i8* nocapture %a, i8* nocapture %b) nounwind {
   1399 vector.ph:
   1400   br label %vector.body
   1401 
   1402 vector.body:                                      ; preds = %vector.body, %vector.ph
   1403   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1404   %gep.a = getelementptr inbounds i8* %a, i64 %index
   1405   %gep.b = getelementptr inbounds i8* %b, i64 %index
   1406   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
   1407   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
   1408   %load.a = load <16 x i8>* %ptr.a, align 2
   1409   %load.b = load <16 x i8>* %ptr.b, align 2
   1410   %cmp = icmp slt <16 x i8> %load.a, %load.b
   1411   %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
   1412   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
   1413   %index.next = add i64 %index, 16
   1414   %loop = icmp eq i64 %index.next, 16384
   1415   br i1 %loop, label %for.end, label %vector.body
   1416 
   1417 for.end:                                          ; preds = %vector.body
   1418   ret void
   1419 
   1420 ; SSE4: test49:
   1421 ; SSE4: pmaxsb
   1422 
   1423 ; AVX1: test49:
   1424 ; AVX1: vpmaxsb
   1425 
   1426 ; AVX2: test49:
   1427 ; AVX2: vpmaxsb
   1428 }
   1429 
   1430 define void @test50(i8* nocapture %a, i8* nocapture %b) nounwind {
   1431 vector.ph:
   1432   br label %vector.body
   1433 
   1434 vector.body:                                      ; preds = %vector.body, %vector.ph
   1435   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1436   %gep.a = getelementptr inbounds i8* %a, i64 %index
   1437   %gep.b = getelementptr inbounds i8* %b, i64 %index
   1438   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
   1439   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
   1440   %load.a = load <16 x i8>* %ptr.a, align 2
   1441   %load.b = load <16 x i8>* %ptr.b, align 2
   1442   %cmp = icmp sle <16 x i8> %load.a, %load.b
   1443   %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
   1444   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
   1445   %index.next = add i64 %index, 16
   1446   %loop = icmp eq i64 %index.next, 16384
   1447   br i1 %loop, label %for.end, label %vector.body
   1448 
   1449 for.end:                                          ; preds = %vector.body
   1450   ret void
   1451 
   1452 ; SSE4: test50:
   1453 ; SSE4: pmaxsb
   1454 
   1455 ; AVX1: test50:
   1456 ; AVX1: vpmaxsb
   1457 
   1458 ; AVX2: test50:
   1459 ; AVX2: vpmaxsb
   1460 }
   1461 
   1462 define void @test51(i8* nocapture %a, i8* nocapture %b) nounwind {
   1463 vector.ph:
   1464   br label %vector.body
   1465 
   1466 vector.body:                                      ; preds = %vector.body, %vector.ph
   1467   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1468   %gep.a = getelementptr inbounds i8* %a, i64 %index
   1469   %gep.b = getelementptr inbounds i8* %b, i64 %index
   1470   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
   1471   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
   1472   %load.a = load <16 x i8>* %ptr.a, align 2
   1473   %load.b = load <16 x i8>* %ptr.b, align 2
   1474   %cmp = icmp sgt <16 x i8> %load.a, %load.b
   1475   %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
   1476   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
   1477   %index.next = add i64 %index, 16
   1478   %loop = icmp eq i64 %index.next, 16384
   1479   br i1 %loop, label %for.end, label %vector.body
   1480 
   1481 for.end:                                          ; preds = %vector.body
   1482   ret void
   1483 
   1484 ; SSE4: test51:
   1485 ; SSE4: pminsb
   1486 
   1487 ; AVX1: test51:
   1488 ; AVX1: vpminsb
   1489 
   1490 ; AVX2: test51:
   1491 ; AVX2: vpminsb
   1492 }
   1493 
   1494 define void @test52(i8* nocapture %a, i8* nocapture %b) nounwind {
   1495 vector.ph:
   1496   br label %vector.body
   1497 
   1498 vector.body:                                      ; preds = %vector.body, %vector.ph
   1499   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1500   %gep.a = getelementptr inbounds i8* %a, i64 %index
   1501   %gep.b = getelementptr inbounds i8* %b, i64 %index
   1502   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
   1503   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
   1504   %load.a = load <16 x i8>* %ptr.a, align 2
   1505   %load.b = load <16 x i8>* %ptr.b, align 2
   1506   %cmp = icmp sge <16 x i8> %load.a, %load.b
   1507   %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
   1508   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
   1509   %index.next = add i64 %index, 16
   1510   %loop = icmp eq i64 %index.next, 16384
   1511   br i1 %loop, label %for.end, label %vector.body
   1512 
   1513 for.end:                                          ; preds = %vector.body
   1514   ret void
   1515 
   1516 ; SSE4: test52:
   1517 ; SSE4: pminsb
   1518 
   1519 ; AVX1: test52:
   1520 ; AVX1: vpminsb
   1521 
   1522 ; AVX2: test52:
   1523 ; AVX2: vpminsb
   1524 }
   1525 
   1526 define void @test53(i8* nocapture %a, i8* nocapture %b) nounwind {
   1527 vector.ph:
   1528   br label %vector.body
   1529 
   1530 vector.body:                                      ; preds = %vector.body, %vector.ph
   1531   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1532   %gep.a = getelementptr inbounds i8* %a, i64 %index
   1533   %gep.b = getelementptr inbounds i8* %b, i64 %index
   1534   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
   1535   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
   1536   %load.a = load <16 x i8>* %ptr.a, align 2
   1537   %load.b = load <16 x i8>* %ptr.b, align 2
   1538   %cmp = icmp ult <16 x i8> %load.a, %load.b
   1539   %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
   1540   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
   1541   %index.next = add i64 %index, 16
   1542   %loop = icmp eq i64 %index.next, 16384
   1543   br i1 %loop, label %for.end, label %vector.body
   1544 
   1545 for.end:                                          ; preds = %vector.body
   1546   ret void
   1547 
   1548 ; SSE2: test53:
   1549 ; SSE2: pmaxub
   1550 
   1551 ; AVX1: test53:
   1552 ; AVX1: vpmaxub
   1553 
   1554 ; AVX2: test53:
   1555 ; AVX2: vpmaxub
   1556 }
   1557 
   1558 define void @test54(i8* nocapture %a, i8* nocapture %b) nounwind {
   1559 vector.ph:
   1560   br label %vector.body
   1561 
   1562 vector.body:                                      ; preds = %vector.body, %vector.ph
   1563   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1564   %gep.a = getelementptr inbounds i8* %a, i64 %index
   1565   %gep.b = getelementptr inbounds i8* %b, i64 %index
   1566   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
   1567   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
   1568   %load.a = load <16 x i8>* %ptr.a, align 2
   1569   %load.b = load <16 x i8>* %ptr.b, align 2
   1570   %cmp = icmp ule <16 x i8> %load.a, %load.b
   1571   %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
   1572   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
   1573   %index.next = add i64 %index, 16
   1574   %loop = icmp eq i64 %index.next, 16384
   1575   br i1 %loop, label %for.end, label %vector.body
   1576 
   1577 for.end:                                          ; preds = %vector.body
   1578   ret void
   1579 
   1580 ; SSE2: test54:
   1581 ; SSE2: pmaxub
   1582 
   1583 ; AVX1: test54:
   1584 ; AVX1: vpmaxub
   1585 
   1586 ; AVX2: test54:
   1587 ; AVX2: vpmaxub
   1588 }
   1589 
   1590 define void @test55(i8* nocapture %a, i8* nocapture %b) nounwind {
   1591 vector.ph:
   1592   br label %vector.body
   1593 
   1594 vector.body:                                      ; preds = %vector.body, %vector.ph
   1595   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1596   %gep.a = getelementptr inbounds i8* %a, i64 %index
   1597   %gep.b = getelementptr inbounds i8* %b, i64 %index
   1598   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
   1599   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
   1600   %load.a = load <16 x i8>* %ptr.a, align 2
   1601   %load.b = load <16 x i8>* %ptr.b, align 2
   1602   %cmp = icmp ugt <16 x i8> %load.a, %load.b
   1603   %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
   1604   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
   1605   %index.next = add i64 %index, 16
   1606   %loop = icmp eq i64 %index.next, 16384
   1607   br i1 %loop, label %for.end, label %vector.body
   1608 
   1609 for.end:                                          ; preds = %vector.body
   1610   ret void
   1611 
   1612 ; SSE2: test55:
   1613 ; SSE2: pminub
   1614 
   1615 ; AVX1: test55:
   1616 ; AVX1: vpminub
   1617 
   1618 ; AVX2: test55:
   1619 ; AVX2: vpminub
   1620 }
   1621 
   1622 define void @test56(i8* nocapture %a, i8* nocapture %b) nounwind {
   1623 vector.ph:
   1624   br label %vector.body
   1625 
   1626 vector.body:                                      ; preds = %vector.body, %vector.ph
   1627   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1628   %gep.a = getelementptr inbounds i8* %a, i64 %index
   1629   %gep.b = getelementptr inbounds i8* %b, i64 %index
   1630   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
   1631   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
   1632   %load.a = load <16 x i8>* %ptr.a, align 2
   1633   %load.b = load <16 x i8>* %ptr.b, align 2
   1634   %cmp = icmp uge <16 x i8> %load.a, %load.b
   1635   %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
   1636   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
   1637   %index.next = add i64 %index, 16
   1638   %loop = icmp eq i64 %index.next, 16384
   1639   br i1 %loop, label %for.end, label %vector.body
   1640 
   1641 for.end:                                          ; preds = %vector.body
   1642   ret void
   1643 
   1644 ; SSE2: test56:
   1645 ; SSE2: pminub
   1646 
   1647 ; AVX1: test56:
   1648 ; AVX1: vpminub
   1649 
   1650 ; AVX2: test56:
   1651 ; AVX2: vpminub
   1652 }
   1653 
   1654 define void @test57(i16* nocapture %a, i16* nocapture %b) nounwind {
   1655 vector.ph:
   1656   br label %vector.body
   1657 
   1658 vector.body:                                      ; preds = %vector.body, %vector.ph
   1659   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1660   %gep.a = getelementptr inbounds i16* %a, i64 %index
   1661   %gep.b = getelementptr inbounds i16* %b, i64 %index
   1662   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
   1663   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
   1664   %load.a = load <8 x i16>* %ptr.a, align 2
   1665   %load.b = load <8 x i16>* %ptr.b, align 2
   1666   %cmp = icmp slt <8 x i16> %load.a, %load.b
   1667   %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
   1668   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
   1669   %index.next = add i64 %index, 8
   1670   %loop = icmp eq i64 %index.next, 16384
   1671   br i1 %loop, label %for.end, label %vector.body
   1672 
   1673 for.end:                                          ; preds = %vector.body
   1674   ret void
   1675 
   1676 ; SSE2: test57:
   1677 ; SSE2: pmaxsw
   1678 
   1679 ; AVX1: test57:
   1680 ; AVX1: vpmaxsw
   1681 
   1682 ; AVX2: test57:
   1683 ; AVX2: vpmaxsw
   1684 }
   1685 
   1686 define void @test58(i16* nocapture %a, i16* nocapture %b) nounwind {
   1687 vector.ph:
   1688   br label %vector.body
   1689 
   1690 vector.body:                                      ; preds = %vector.body, %vector.ph
   1691   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1692   %gep.a = getelementptr inbounds i16* %a, i64 %index
   1693   %gep.b = getelementptr inbounds i16* %b, i64 %index
   1694   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
   1695   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
   1696   %load.a = load <8 x i16>* %ptr.a, align 2
   1697   %load.b = load <8 x i16>* %ptr.b, align 2
   1698   %cmp = icmp sle <8 x i16> %load.a, %load.b
   1699   %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
   1700   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
   1701   %index.next = add i64 %index, 8
   1702   %loop = icmp eq i64 %index.next, 16384
   1703   br i1 %loop, label %for.end, label %vector.body
   1704 
   1705 for.end:                                          ; preds = %vector.body
   1706   ret void
   1707 
   1708 ; SSE2: test58:
   1709 ; SSE2: pmaxsw
   1710 
   1711 ; AVX1: test58:
   1712 ; AVX1: vpmaxsw
   1713 
   1714 ; AVX2: test58:
   1715 ; AVX2: vpmaxsw
   1716 }
   1717 
   1718 define void @test59(i16* nocapture %a, i16* nocapture %b) nounwind {
   1719 vector.ph:
   1720   br label %vector.body
   1721 
   1722 vector.body:                                      ; preds = %vector.body, %vector.ph
   1723   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1724   %gep.a = getelementptr inbounds i16* %a, i64 %index
   1725   %gep.b = getelementptr inbounds i16* %b, i64 %index
   1726   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
   1727   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
   1728   %load.a = load <8 x i16>* %ptr.a, align 2
   1729   %load.b = load <8 x i16>* %ptr.b, align 2
   1730   %cmp = icmp sgt <8 x i16> %load.a, %load.b
   1731   %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
   1732   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
   1733   %index.next = add i64 %index, 8
   1734   %loop = icmp eq i64 %index.next, 16384
   1735   br i1 %loop, label %for.end, label %vector.body
   1736 
   1737 for.end:                                          ; preds = %vector.body
   1738   ret void
   1739 
   1740 ; SSE2: test59:
   1741 ; SSE2: pminsw
   1742 
   1743 ; AVX1: test59:
   1744 ; AVX1: vpminsw
   1745 
   1746 ; AVX2: test59:
   1747 ; AVX2: vpminsw
   1748 }
   1749 
   1750 define void @test60(i16* nocapture %a, i16* nocapture %b) nounwind {
   1751 vector.ph:
   1752   br label %vector.body
   1753 
   1754 vector.body:                                      ; preds = %vector.body, %vector.ph
   1755   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1756   %gep.a = getelementptr inbounds i16* %a, i64 %index
   1757   %gep.b = getelementptr inbounds i16* %b, i64 %index
   1758   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
   1759   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
   1760   %load.a = load <8 x i16>* %ptr.a, align 2
   1761   %load.b = load <8 x i16>* %ptr.b, align 2
   1762   %cmp = icmp sge <8 x i16> %load.a, %load.b
   1763   %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
   1764   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
   1765   %index.next = add i64 %index, 8
   1766   %loop = icmp eq i64 %index.next, 16384
   1767   br i1 %loop, label %for.end, label %vector.body
   1768 
   1769 for.end:                                          ; preds = %vector.body
   1770   ret void
   1771 
   1772 ; SSE2: test60:
   1773 ; SSE2: pminsw
   1774 
   1775 ; AVX1: test60:
   1776 ; AVX1: vpminsw
   1777 
   1778 ; AVX2: test60:
   1779 ; AVX2: vpminsw
   1780 }
   1781 
   1782 define void @test61(i16* nocapture %a, i16* nocapture %b) nounwind {
   1783 vector.ph:
   1784   br label %vector.body
   1785 
   1786 vector.body:                                      ; preds = %vector.body, %vector.ph
   1787   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1788   %gep.a = getelementptr inbounds i16* %a, i64 %index
   1789   %gep.b = getelementptr inbounds i16* %b, i64 %index
   1790   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
   1791   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
   1792   %load.a = load <8 x i16>* %ptr.a, align 2
   1793   %load.b = load <8 x i16>* %ptr.b, align 2
   1794   %cmp = icmp ult <8 x i16> %load.a, %load.b
   1795   %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
   1796   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
   1797   %index.next = add i64 %index, 8
   1798   %loop = icmp eq i64 %index.next, 16384
   1799   br i1 %loop, label %for.end, label %vector.body
   1800 
   1801 for.end:                                          ; preds = %vector.body
   1802   ret void
   1803 
   1804 ; SSE4: test61:
   1805 ; SSE4: pmaxuw
   1806 
   1807 ; AVX1: test61:
   1808 ; AVX1: vpmaxuw
   1809 
   1810 ; AVX2: test61:
   1811 ; AVX2: vpmaxuw
   1812 }
   1813 
   1814 define void @test62(i16* nocapture %a, i16* nocapture %b) nounwind {
   1815 vector.ph:
   1816   br label %vector.body
   1817 
   1818 vector.body:                                      ; preds = %vector.body, %vector.ph
   1819   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1820   %gep.a = getelementptr inbounds i16* %a, i64 %index
   1821   %gep.b = getelementptr inbounds i16* %b, i64 %index
   1822   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
   1823   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
   1824   %load.a = load <8 x i16>* %ptr.a, align 2
   1825   %load.b = load <8 x i16>* %ptr.b, align 2
   1826   %cmp = icmp ule <8 x i16> %load.a, %load.b
   1827   %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
   1828   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
   1829   %index.next = add i64 %index, 8
   1830   %loop = icmp eq i64 %index.next, 16384
   1831   br i1 %loop, label %for.end, label %vector.body
   1832 
   1833 for.end:                                          ; preds = %vector.body
   1834   ret void
   1835 
   1836 ; SSE4: test62:
   1837 ; SSE4: pmaxuw
   1838 
   1839 ; AVX1: test62:
   1840 ; AVX1: vpmaxuw
   1841 
   1842 ; AVX2: test62:
   1843 ; AVX2: vpmaxuw
   1844 }
   1845 
   1846 define void @test63(i16* nocapture %a, i16* nocapture %b) nounwind {
   1847 vector.ph:
   1848   br label %vector.body
   1849 
   1850 vector.body:                                      ; preds = %vector.body, %vector.ph
   1851   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1852   %gep.a = getelementptr inbounds i16* %a, i64 %index
   1853   %gep.b = getelementptr inbounds i16* %b, i64 %index
   1854   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
   1855   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
   1856   %load.a = load <8 x i16>* %ptr.a, align 2
   1857   %load.b = load <8 x i16>* %ptr.b, align 2
   1858   %cmp = icmp ugt <8 x i16> %load.a, %load.b
   1859   %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
   1860   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
   1861   %index.next = add i64 %index, 8
   1862   %loop = icmp eq i64 %index.next, 16384
   1863   br i1 %loop, label %for.end, label %vector.body
   1864 
   1865 for.end:                                          ; preds = %vector.body
   1866   ret void
   1867 
   1868 ; SSE4: test63:
   1869 ; SSE4: pminuw
   1870 
   1871 ; AVX1: test63:
   1872 ; AVX1: vpminuw
   1873 
   1874 ; AVX2: test63:
   1875 ; AVX2: vpminuw
   1876 }
   1877 
   1878 define void @test64(i16* nocapture %a, i16* nocapture %b) nounwind {
   1879 vector.ph:
   1880   br label %vector.body
   1881 
   1882 vector.body:                                      ; preds = %vector.body, %vector.ph
   1883   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1884   %gep.a = getelementptr inbounds i16* %a, i64 %index
   1885   %gep.b = getelementptr inbounds i16* %b, i64 %index
   1886   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
   1887   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
   1888   %load.a = load <8 x i16>* %ptr.a, align 2
   1889   %load.b = load <8 x i16>* %ptr.b, align 2
   1890   %cmp = icmp uge <8 x i16> %load.a, %load.b
   1891   %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
   1892   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
   1893   %index.next = add i64 %index, 8
   1894   %loop = icmp eq i64 %index.next, 16384
   1895   br i1 %loop, label %for.end, label %vector.body
   1896 
   1897 for.end:                                          ; preds = %vector.body
   1898   ret void
   1899 
   1900 ; SSE4: test64:
   1901 ; SSE4: pminuw
   1902 
   1903 ; AVX1: test64:
   1904 ; AVX1: vpminuw
   1905 
   1906 ; AVX2: test64:
   1907 ; AVX2: vpminuw
   1908 }
   1909 
   1910 define void @test65(i32* nocapture %a, i32* nocapture %b) nounwind {
   1911 vector.ph:
   1912   br label %vector.body
   1913 
   1914 vector.body:                                      ; preds = %vector.body, %vector.ph
   1915   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1916   %gep.a = getelementptr inbounds i32* %a, i64 %index
   1917   %gep.b = getelementptr inbounds i32* %b, i64 %index
   1918   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
   1919   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
   1920   %load.a = load <4 x i32>* %ptr.a, align 2
   1921   %load.b = load <4 x i32>* %ptr.b, align 2
   1922   %cmp = icmp slt <4 x i32> %load.a, %load.b
   1923   %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
   1924   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
   1925   %index.next = add i64 %index, 4
   1926   %loop = icmp eq i64 %index.next, 16384
   1927   br i1 %loop, label %for.end, label %vector.body
   1928 
   1929 for.end:                                          ; preds = %vector.body
   1930   ret void
   1931 
   1932 ; SSE4: test65:
   1933 ; SSE4: pmaxsd
   1934 
   1935 ; AVX1: test65:
   1936 ; AVX1: vpmaxsd
   1937 
   1938 ; AVX2: test65:
   1939 ; AVX2: vpmaxsd
   1940 }
   1941 
   1942 define void @test66(i32* nocapture %a, i32* nocapture %b) nounwind {
   1943 vector.ph:
   1944   br label %vector.body
   1945 
   1946 vector.body:                                      ; preds = %vector.body, %vector.ph
   1947   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1948   %gep.a = getelementptr inbounds i32* %a, i64 %index
   1949   %gep.b = getelementptr inbounds i32* %b, i64 %index
   1950   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
   1951   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
   1952   %load.a = load <4 x i32>* %ptr.a, align 2
   1953   %load.b = load <4 x i32>* %ptr.b, align 2
   1954   %cmp = icmp sle <4 x i32> %load.a, %load.b
   1955   %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
   1956   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
   1957   %index.next = add i64 %index, 4
   1958   %loop = icmp eq i64 %index.next, 16384
   1959   br i1 %loop, label %for.end, label %vector.body
   1960 
   1961 for.end:                                          ; preds = %vector.body
   1962   ret void
   1963 
   1964 ; SSE4: test66:
   1965 ; SSE4: pmaxsd
   1966 
   1967 ; AVX1: test66:
   1968 ; AVX1: vpmaxsd
   1969 
   1970 ; AVX2: test66:
   1971 ; AVX2: vpmaxsd
   1972 }
   1973 
   1974 define void @test67(i32* nocapture %a, i32* nocapture %b) nounwind {
   1975 vector.ph:
   1976   br label %vector.body
   1977 
   1978 vector.body:                                      ; preds = %vector.body, %vector.ph
   1979   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   1980   %gep.a = getelementptr inbounds i32* %a, i64 %index
   1981   %gep.b = getelementptr inbounds i32* %b, i64 %index
   1982   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
   1983   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
   1984   %load.a = load <4 x i32>* %ptr.a, align 2
   1985   %load.b = load <4 x i32>* %ptr.b, align 2
   1986   %cmp = icmp sgt <4 x i32> %load.a, %load.b
   1987   %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
   1988   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
   1989   %index.next = add i64 %index, 4
   1990   %loop = icmp eq i64 %index.next, 16384
   1991   br i1 %loop, label %for.end, label %vector.body
   1992 
   1993 for.end:                                          ; preds = %vector.body
   1994   ret void
   1995 
   1996 ; SSE4: test67:
   1997 ; SSE4: pminsd
   1998 
   1999 ; AVX1: test67:
   2000 ; AVX1: vpminsd
   2001 
   2002 ; AVX2: test67:
   2003 ; AVX2: vpminsd
   2004 }
   2005 
   2006 define void @test68(i32* nocapture %a, i32* nocapture %b) nounwind {
   2007 vector.ph:
   2008   br label %vector.body
   2009 
   2010 vector.body:                                      ; preds = %vector.body, %vector.ph
   2011   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2012   %gep.a = getelementptr inbounds i32* %a, i64 %index
   2013   %gep.b = getelementptr inbounds i32* %b, i64 %index
   2014   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
   2015   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
   2016   %load.a = load <4 x i32>* %ptr.a, align 2
   2017   %load.b = load <4 x i32>* %ptr.b, align 2
   2018   %cmp = icmp sge <4 x i32> %load.a, %load.b
   2019   %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
   2020   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
   2021   %index.next = add i64 %index, 4
   2022   %loop = icmp eq i64 %index.next, 16384
   2023   br i1 %loop, label %for.end, label %vector.body
   2024 
   2025 for.end:                                          ; preds = %vector.body
   2026   ret void
   2027 
   2028 ; SSE4: test68:
   2029 ; SSE4: pminsd
   2030 
   2031 ; AVX1: test68:
   2032 ; AVX1: vpminsd
   2033 
   2034 ; AVX2: test68:
   2035 ; AVX2: vpminsd
   2036 }
   2037 
   2038 define void @test69(i32* nocapture %a, i32* nocapture %b) nounwind {
   2039 vector.ph:
   2040   br label %vector.body
   2041 
   2042 vector.body:                                      ; preds = %vector.body, %vector.ph
   2043   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2044   %gep.a = getelementptr inbounds i32* %a, i64 %index
   2045   %gep.b = getelementptr inbounds i32* %b, i64 %index
   2046   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
   2047   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
   2048   %load.a = load <4 x i32>* %ptr.a, align 2
   2049   %load.b = load <4 x i32>* %ptr.b, align 2
   2050   %cmp = icmp ult <4 x i32> %load.a, %load.b
   2051   %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
   2052   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
   2053   %index.next = add i64 %index, 4
   2054   %loop = icmp eq i64 %index.next, 16384
   2055   br i1 %loop, label %for.end, label %vector.body
   2056 
   2057 for.end:                                          ; preds = %vector.body
   2058   ret void
   2059 
   2060 ; SSE4: test69:
   2061 ; SSE4: pmaxud
   2062 
   2063 ; AVX1: test69:
   2064 ; AVX1: vpmaxud
   2065 
   2066 ; AVX2: test69:
   2067 ; AVX2: vpmaxud
   2068 }
   2069 
   2070 define void @test70(i32* nocapture %a, i32* nocapture %b) nounwind {
   2071 vector.ph:
   2072   br label %vector.body
   2073 
   2074 vector.body:                                      ; preds = %vector.body, %vector.ph
   2075   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2076   %gep.a = getelementptr inbounds i32* %a, i64 %index
   2077   %gep.b = getelementptr inbounds i32* %b, i64 %index
   2078   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
   2079   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
   2080   %load.a = load <4 x i32>* %ptr.a, align 2
   2081   %load.b = load <4 x i32>* %ptr.b, align 2
   2082   %cmp = icmp ule <4 x i32> %load.a, %load.b
   2083   %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
   2084   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
   2085   %index.next = add i64 %index, 4
   2086   %loop = icmp eq i64 %index.next, 16384
   2087   br i1 %loop, label %for.end, label %vector.body
   2088 
   2089 for.end:                                          ; preds = %vector.body
   2090   ret void
   2091 
   2092 ; SSE4: test70:
   2093 ; SSE4: pmaxud
   2094 
   2095 ; AVX1: test70:
   2096 ; AVX1: vpmaxud
   2097 
   2098 ; AVX2: test70:
   2099 ; AVX2: vpmaxud
   2100 }
   2101 
   2102 define void @test71(i32* nocapture %a, i32* nocapture %b) nounwind {
   2103 vector.ph:
   2104   br label %vector.body
   2105 
   2106 vector.body:                                      ; preds = %vector.body, %vector.ph
   2107   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2108   %gep.a = getelementptr inbounds i32* %a, i64 %index
   2109   %gep.b = getelementptr inbounds i32* %b, i64 %index
   2110   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
   2111   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
   2112   %load.a = load <4 x i32>* %ptr.a, align 2
   2113   %load.b = load <4 x i32>* %ptr.b, align 2
   2114   %cmp = icmp ugt <4 x i32> %load.a, %load.b
   2115   %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
   2116   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
   2117   %index.next = add i64 %index, 4
   2118   %loop = icmp eq i64 %index.next, 16384
   2119   br i1 %loop, label %for.end, label %vector.body
   2120 
   2121 for.end:                                          ; preds = %vector.body
   2122   ret void
   2123 
   2124 ; SSE4: test71:
   2125 ; SSE4: pminud
   2126 
   2127 ; AVX1: test71:
   2128 ; AVX1: vpminud
   2129 
   2130 ; AVX2: test71:
   2131 ; AVX2: vpminud
   2132 }
   2133 
   2134 define void @test72(i32* nocapture %a, i32* nocapture %b) nounwind {
   2135 vector.ph:
   2136   br label %vector.body
   2137 
   2138 vector.body:                                      ; preds = %vector.body, %vector.ph
   2139   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2140   %gep.a = getelementptr inbounds i32* %a, i64 %index
   2141   %gep.b = getelementptr inbounds i32* %b, i64 %index
   2142   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
   2143   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
   2144   %load.a = load <4 x i32>* %ptr.a, align 2
   2145   %load.b = load <4 x i32>* %ptr.b, align 2
   2146   %cmp = icmp uge <4 x i32> %load.a, %load.b
   2147   %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
   2148   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
   2149   %index.next = add i64 %index, 4
   2150   %loop = icmp eq i64 %index.next, 16384
   2151   br i1 %loop, label %for.end, label %vector.body
   2152 
   2153 for.end:                                          ; preds = %vector.body
   2154   ret void
   2155 
   2156 ; SSE4: test72:
   2157 ; SSE4: pminud
   2158 
   2159 ; AVX1: test72:
   2160 ; AVX1: vpminud
   2161 
   2162 ; AVX2: test72:
   2163 ; AVX2: vpminud
   2164 }
   2165 
   2166 define void @test73(i8* nocapture %a, i8* nocapture %b) nounwind {
   2167 vector.ph:
   2168   br label %vector.body
   2169 
   2170 vector.body:                                      ; preds = %vector.body, %vector.ph
   2171   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2172   %gep.a = getelementptr inbounds i8* %a, i64 %index
   2173   %gep.b = getelementptr inbounds i8* %b, i64 %index
   2174   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
   2175   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
   2176   %load.a = load <32 x i8>* %ptr.a, align 2
   2177   %load.b = load <32 x i8>* %ptr.b, align 2
   2178   %cmp = icmp slt <32 x i8> %load.a, %load.b
   2179   %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
   2180   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
   2181   %index.next = add i64 %index, 32
   2182   %loop = icmp eq i64 %index.next, 16384
   2183   br i1 %loop, label %for.end, label %vector.body
   2184 
   2185 for.end:                                          ; preds = %vector.body
   2186   ret void
   2187 
   2188 ; AVX2: test73:
   2189 ; AVX2: vpmaxsb
   2190 }
   2191 
   2192 define void @test74(i8* nocapture %a, i8* nocapture %b) nounwind {
   2193 vector.ph:
   2194   br label %vector.body
   2195 
   2196 vector.body:                                      ; preds = %vector.body, %vector.ph
   2197   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2198   %gep.a = getelementptr inbounds i8* %a, i64 %index
   2199   %gep.b = getelementptr inbounds i8* %b, i64 %index
   2200   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
   2201   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
   2202   %load.a = load <32 x i8>* %ptr.a, align 2
   2203   %load.b = load <32 x i8>* %ptr.b, align 2
   2204   %cmp = icmp sle <32 x i8> %load.a, %load.b
   2205   %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
   2206   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
   2207   %index.next = add i64 %index, 32
   2208   %loop = icmp eq i64 %index.next, 16384
   2209   br i1 %loop, label %for.end, label %vector.body
   2210 
   2211 for.end:                                          ; preds = %vector.body
   2212   ret void
   2213 
   2214 ; AVX2: test74:
   2215 ; AVX2: vpmaxsb
   2216 }
   2217 
   2218 define void @test75(i8* nocapture %a, i8* nocapture %b) nounwind {
   2219 vector.ph:
   2220   br label %vector.body
   2221 
   2222 vector.body:                                      ; preds = %vector.body, %vector.ph
   2223   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2224   %gep.a = getelementptr inbounds i8* %a, i64 %index
   2225   %gep.b = getelementptr inbounds i8* %b, i64 %index
   2226   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
   2227   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
   2228   %load.a = load <32 x i8>* %ptr.a, align 2
   2229   %load.b = load <32 x i8>* %ptr.b, align 2
   2230   %cmp = icmp sgt <32 x i8> %load.a, %load.b
   2231   %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
   2232   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
   2233   %index.next = add i64 %index, 32
   2234   %loop = icmp eq i64 %index.next, 16384
   2235   br i1 %loop, label %for.end, label %vector.body
   2236 
   2237 for.end:                                          ; preds = %vector.body
   2238   ret void
   2239 
   2240 ; AVX2: test75:
   2241 ; AVX2: vpminsb
   2242 }
   2243 
   2244 define void @test76(i8* nocapture %a, i8* nocapture %b) nounwind {
   2245 vector.ph:
   2246   br label %vector.body
   2247 
   2248 vector.body:                                      ; preds = %vector.body, %vector.ph
   2249   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2250   %gep.a = getelementptr inbounds i8* %a, i64 %index
   2251   %gep.b = getelementptr inbounds i8* %b, i64 %index
   2252   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
   2253   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
   2254   %load.a = load <32 x i8>* %ptr.a, align 2
   2255   %load.b = load <32 x i8>* %ptr.b, align 2
   2256   %cmp = icmp sge <32 x i8> %load.a, %load.b
   2257   %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
   2258   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
   2259   %index.next = add i64 %index, 32
   2260   %loop = icmp eq i64 %index.next, 16384
   2261   br i1 %loop, label %for.end, label %vector.body
   2262 
   2263 for.end:                                          ; preds = %vector.body
   2264   ret void
   2265 
   2266 ; AVX2: test76:
   2267 ; AVX2: vpminsb
   2268 }
   2269 
   2270 define void @test77(i8* nocapture %a, i8* nocapture %b) nounwind {
   2271 vector.ph:
   2272   br label %vector.body
   2273 
   2274 vector.body:                                      ; preds = %vector.body, %vector.ph
   2275   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2276   %gep.a = getelementptr inbounds i8* %a, i64 %index
   2277   %gep.b = getelementptr inbounds i8* %b, i64 %index
   2278   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
   2279   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
   2280   %load.a = load <32 x i8>* %ptr.a, align 2
   2281   %load.b = load <32 x i8>* %ptr.b, align 2
   2282   %cmp = icmp ult <32 x i8> %load.a, %load.b
   2283   %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
   2284   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
   2285   %index.next = add i64 %index, 32
   2286   %loop = icmp eq i64 %index.next, 16384
   2287   br i1 %loop, label %for.end, label %vector.body
   2288 
   2289 for.end:                                          ; preds = %vector.body
   2290   ret void
   2291 
   2292 ; AVX2: test77:
   2293 ; AVX2: vpmaxub
   2294 }
   2295 
   2296 define void @test78(i8* nocapture %a, i8* nocapture %b) nounwind {
   2297 vector.ph:
   2298   br label %vector.body
   2299 
   2300 vector.body:                                      ; preds = %vector.body, %vector.ph
   2301   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2302   %gep.a = getelementptr inbounds i8* %a, i64 %index
   2303   %gep.b = getelementptr inbounds i8* %b, i64 %index
   2304   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
   2305   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
   2306   %load.a = load <32 x i8>* %ptr.a, align 2
   2307   %load.b = load <32 x i8>* %ptr.b, align 2
   2308   %cmp = icmp ule <32 x i8> %load.a, %load.b
   2309   %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
   2310   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
   2311   %index.next = add i64 %index, 32
   2312   %loop = icmp eq i64 %index.next, 16384
   2313   br i1 %loop, label %for.end, label %vector.body
   2314 
   2315 for.end:                                          ; preds = %vector.body
   2316   ret void
   2317 
   2318 ; AVX2: test78:
   2319 ; AVX2: vpmaxub
   2320 }
   2321 
   2322 define void @test79(i8* nocapture %a, i8* nocapture %b) nounwind {
   2323 vector.ph:
   2324   br label %vector.body
   2325 
   2326 vector.body:                                      ; preds = %vector.body, %vector.ph
   2327   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2328   %gep.a = getelementptr inbounds i8* %a, i64 %index
   2329   %gep.b = getelementptr inbounds i8* %b, i64 %index
   2330   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
   2331   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
   2332   %load.a = load <32 x i8>* %ptr.a, align 2
   2333   %load.b = load <32 x i8>* %ptr.b, align 2
   2334   %cmp = icmp ugt <32 x i8> %load.a, %load.b
   2335   %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
   2336   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
   2337   %index.next = add i64 %index, 32
   2338   %loop = icmp eq i64 %index.next, 16384
   2339   br i1 %loop, label %for.end, label %vector.body
   2340 
   2341 for.end:                                          ; preds = %vector.body
   2342   ret void
   2343 
   2344 ; AVX2: test79:
   2345 ; AVX2: vpminub
   2346 }
   2347 
   2348 define void @test80(i8* nocapture %a, i8* nocapture %b) nounwind {
   2349 vector.ph:
   2350   br label %vector.body
   2351 
   2352 vector.body:                                      ; preds = %vector.body, %vector.ph
   2353   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2354   %gep.a = getelementptr inbounds i8* %a, i64 %index
   2355   %gep.b = getelementptr inbounds i8* %b, i64 %index
   2356   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
   2357   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
   2358   %load.a = load <32 x i8>* %ptr.a, align 2
   2359   %load.b = load <32 x i8>* %ptr.b, align 2
   2360   %cmp = icmp uge <32 x i8> %load.a, %load.b
   2361   %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
   2362   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
   2363   %index.next = add i64 %index, 32
   2364   %loop = icmp eq i64 %index.next, 16384
   2365   br i1 %loop, label %for.end, label %vector.body
   2366 
   2367 for.end:                                          ; preds = %vector.body
   2368   ret void
   2369 
   2370 ; AVX2: test80:
   2371 ; AVX2: vpminub
   2372 }
   2373 
   2374 define void @test81(i16* nocapture %a, i16* nocapture %b) nounwind {
   2375 vector.ph:
   2376   br label %vector.body
   2377 
   2378 vector.body:                                      ; preds = %vector.body, %vector.ph
   2379   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2380   %gep.a = getelementptr inbounds i16* %a, i64 %index
   2381   %gep.b = getelementptr inbounds i16* %b, i64 %index
   2382   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   2383   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
   2384   %load.a = load <16 x i16>* %ptr.a, align 2
   2385   %load.b = load <16 x i16>* %ptr.b, align 2
   2386   %cmp = icmp slt <16 x i16> %load.a, %load.b
   2387   %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
   2388   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
   2389   %index.next = add i64 %index, 16
   2390   %loop = icmp eq i64 %index.next, 16384
   2391   br i1 %loop, label %for.end, label %vector.body
   2392 
   2393 for.end:                                          ; preds = %vector.body
   2394   ret void
   2395 
   2396 ; AVX2: test81:
   2397 ; AVX2: vpmaxsw
   2398 }
   2399 
   2400 define void @test82(i16* nocapture %a, i16* nocapture %b) nounwind {
   2401 vector.ph:
   2402   br label %vector.body
   2403 
   2404 vector.body:                                      ; preds = %vector.body, %vector.ph
   2405   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2406   %gep.a = getelementptr inbounds i16* %a, i64 %index
   2407   %gep.b = getelementptr inbounds i16* %b, i64 %index
   2408   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   2409   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
   2410   %load.a = load <16 x i16>* %ptr.a, align 2
   2411   %load.b = load <16 x i16>* %ptr.b, align 2
   2412   %cmp = icmp sle <16 x i16> %load.a, %load.b
   2413   %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
   2414   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
   2415   %index.next = add i64 %index, 16
   2416   %loop = icmp eq i64 %index.next, 16384
   2417   br i1 %loop, label %for.end, label %vector.body
   2418 
   2419 for.end:                                          ; preds = %vector.body
   2420   ret void
   2421 
   2422 ; AVX2: test82:
   2423 ; AVX2: vpmaxsw
   2424 }
   2425 
   2426 define void @test83(i16* nocapture %a, i16* nocapture %b) nounwind {
   2427 vector.ph:
   2428   br label %vector.body
   2429 
   2430 vector.body:                                      ; preds = %vector.body, %vector.ph
   2431   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2432   %gep.a = getelementptr inbounds i16* %a, i64 %index
   2433   %gep.b = getelementptr inbounds i16* %b, i64 %index
   2434   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   2435   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
   2436   %load.a = load <16 x i16>* %ptr.a, align 2
   2437   %load.b = load <16 x i16>* %ptr.b, align 2
   2438   %cmp = icmp sgt <16 x i16> %load.a, %load.b
   2439   %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
   2440   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
   2441   %index.next = add i64 %index, 16
   2442   %loop = icmp eq i64 %index.next, 16384
   2443   br i1 %loop, label %for.end, label %vector.body
   2444 
   2445 for.end:                                          ; preds = %vector.body
   2446   ret void
   2447 
   2448 ; AVX2: test83:
   2449 ; AVX2: vpminsw
   2450 }
   2451 
   2452 define void @test84(i16* nocapture %a, i16* nocapture %b) nounwind {
   2453 vector.ph:
   2454   br label %vector.body
   2455 
   2456 vector.body:                                      ; preds = %vector.body, %vector.ph
   2457   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2458   %gep.a = getelementptr inbounds i16* %a, i64 %index
   2459   %gep.b = getelementptr inbounds i16* %b, i64 %index
   2460   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   2461   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
   2462   %load.a = load <16 x i16>* %ptr.a, align 2
   2463   %load.b = load <16 x i16>* %ptr.b, align 2
   2464   %cmp = icmp sge <16 x i16> %load.a, %load.b
   2465   %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
   2466   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
   2467   %index.next = add i64 %index, 16
   2468   %loop = icmp eq i64 %index.next, 16384
   2469   br i1 %loop, label %for.end, label %vector.body
   2470 
   2471 for.end:                                          ; preds = %vector.body
   2472   ret void
   2473 
   2474 ; AVX2: test84:
   2475 ; AVX2: vpminsw
   2476 }
   2477 
   2478 define void @test85(i16* nocapture %a, i16* nocapture %b) nounwind {
   2479 vector.ph:
   2480   br label %vector.body
   2481 
   2482 vector.body:                                      ; preds = %vector.body, %vector.ph
   2483   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2484   %gep.a = getelementptr inbounds i16* %a, i64 %index
   2485   %gep.b = getelementptr inbounds i16* %b, i64 %index
   2486   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   2487   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
   2488   %load.a = load <16 x i16>* %ptr.a, align 2
   2489   %load.b = load <16 x i16>* %ptr.b, align 2
   2490   %cmp = icmp ult <16 x i16> %load.a, %load.b
   2491   %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
   2492   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
   2493   %index.next = add i64 %index, 16
   2494   %loop = icmp eq i64 %index.next, 16384
   2495   br i1 %loop, label %for.end, label %vector.body
   2496 
   2497 for.end:                                          ; preds = %vector.body
   2498   ret void
   2499 
   2500 ; AVX2: test85:
   2501 ; AVX2: vpmaxuw
   2502 }
   2503 
   2504 define void @test86(i16* nocapture %a, i16* nocapture %b) nounwind {
   2505 vector.ph:
   2506   br label %vector.body
   2507 
   2508 vector.body:                                      ; preds = %vector.body, %vector.ph
   2509   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2510   %gep.a = getelementptr inbounds i16* %a, i64 %index
   2511   %gep.b = getelementptr inbounds i16* %b, i64 %index
   2512   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   2513   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
   2514   %load.a = load <16 x i16>* %ptr.a, align 2
   2515   %load.b = load <16 x i16>* %ptr.b, align 2
   2516   %cmp = icmp ule <16 x i16> %load.a, %load.b
   2517   %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
   2518   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
   2519   %index.next = add i64 %index, 16
   2520   %loop = icmp eq i64 %index.next, 16384
   2521   br i1 %loop, label %for.end, label %vector.body
   2522 
   2523 for.end:                                          ; preds = %vector.body
   2524   ret void
   2525 
   2526 ; AVX2: test86:
   2527 ; AVX2: vpmaxuw
   2528 }
   2529 
   2530 define void @test87(i16* nocapture %a, i16* nocapture %b) nounwind {
   2531 vector.ph:
   2532   br label %vector.body
   2533 
   2534 vector.body:                                      ; preds = %vector.body, %vector.ph
   2535   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2536   %gep.a = getelementptr inbounds i16* %a, i64 %index
   2537   %gep.b = getelementptr inbounds i16* %b, i64 %index
   2538   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   2539   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
   2540   %load.a = load <16 x i16>* %ptr.a, align 2
   2541   %load.b = load <16 x i16>* %ptr.b, align 2
   2542   %cmp = icmp ugt <16 x i16> %load.a, %load.b
   2543   %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
   2544   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
   2545   %index.next = add i64 %index, 16
   2546   %loop = icmp eq i64 %index.next, 16384
   2547   br i1 %loop, label %for.end, label %vector.body
   2548 
   2549 for.end:                                          ; preds = %vector.body
   2550   ret void
   2551 
   2552 ; AVX2: test87:
   2553 ; AVX2: vpminuw
   2554 }
   2555 
   2556 define void @test88(i16* nocapture %a, i16* nocapture %b) nounwind {
   2557 vector.ph:
   2558   br label %vector.body
   2559 
   2560 vector.body:                                      ; preds = %vector.body, %vector.ph
   2561   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2562   %gep.a = getelementptr inbounds i16* %a, i64 %index
   2563   %gep.b = getelementptr inbounds i16* %b, i64 %index
   2564   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   2565   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
   2566   %load.a = load <16 x i16>* %ptr.a, align 2
   2567   %load.b = load <16 x i16>* %ptr.b, align 2
   2568   %cmp = icmp uge <16 x i16> %load.a, %load.b
   2569   %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
   2570   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
   2571   %index.next = add i64 %index, 16
   2572   %loop = icmp eq i64 %index.next, 16384
   2573   br i1 %loop, label %for.end, label %vector.body
   2574 
   2575 for.end:                                          ; preds = %vector.body
   2576   ret void
   2577 
   2578 ; AVX2: test88:
   2579 ; AVX2: vpminuw
   2580 }
   2581 
   2582 define void @test89(i32* nocapture %a, i32* nocapture %b) nounwind {
   2583 vector.ph:
   2584   br label %vector.body
   2585 
   2586 vector.body:                                      ; preds = %vector.body, %vector.ph
   2587   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2588   %gep.a = getelementptr inbounds i32* %a, i64 %index
   2589   %gep.b = getelementptr inbounds i32* %b, i64 %index
   2590   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   2591   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
   2592   %load.a = load <8 x i32>* %ptr.a, align 2
   2593   %load.b = load <8 x i32>* %ptr.b, align 2
   2594   %cmp = icmp slt <8 x i32> %load.a, %load.b
   2595   %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
   2596   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
   2597   %index.next = add i64 %index, 8
   2598   %loop = icmp eq i64 %index.next, 16384
   2599   br i1 %loop, label %for.end, label %vector.body
   2600 
   2601 for.end:                                          ; preds = %vector.body
   2602   ret void
   2603 
   2604 ; AVX2: test89:
   2605 ; AVX2: vpmaxsd
   2606 }
   2607 
   2608 define void @test90(i32* nocapture %a, i32* nocapture %b) nounwind {
   2609 vector.ph:
   2610   br label %vector.body
   2611 
   2612 vector.body:                                      ; preds = %vector.body, %vector.ph
   2613   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2614   %gep.a = getelementptr inbounds i32* %a, i64 %index
   2615   %gep.b = getelementptr inbounds i32* %b, i64 %index
   2616   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   2617   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
   2618   %load.a = load <8 x i32>* %ptr.a, align 2
   2619   %load.b = load <8 x i32>* %ptr.b, align 2
   2620   %cmp = icmp sle <8 x i32> %load.a, %load.b
   2621   %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
   2622   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
   2623   %index.next = add i64 %index, 8
   2624   %loop = icmp eq i64 %index.next, 16384
   2625   br i1 %loop, label %for.end, label %vector.body
   2626 
   2627 for.end:                                          ; preds = %vector.body
   2628   ret void
   2629 
   2630 ; AVX2: test90:
   2631 ; AVX2: vpmaxsd
   2632 }
   2633 
   2634 define void @test91(i32* nocapture %a, i32* nocapture %b) nounwind {
   2635 vector.ph:
   2636   br label %vector.body
   2637 
   2638 vector.body:                                      ; preds = %vector.body, %vector.ph
   2639   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2640   %gep.a = getelementptr inbounds i32* %a, i64 %index
   2641   %gep.b = getelementptr inbounds i32* %b, i64 %index
   2642   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   2643   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
   2644   %load.a = load <8 x i32>* %ptr.a, align 2
   2645   %load.b = load <8 x i32>* %ptr.b, align 2
   2646   %cmp = icmp sgt <8 x i32> %load.a, %load.b
   2647   %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
   2648   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
   2649   %index.next = add i64 %index, 8
   2650   %loop = icmp eq i64 %index.next, 16384
   2651   br i1 %loop, label %for.end, label %vector.body
   2652 
   2653 for.end:                                          ; preds = %vector.body
   2654   ret void
   2655 
   2656 ; AVX2: test91:
   2657 ; AVX2: vpminsd
   2658 }
   2659 
   2660 define void @test92(i32* nocapture %a, i32* nocapture %b) nounwind {
   2661 vector.ph:
   2662   br label %vector.body
   2663 
   2664 vector.body:                                      ; preds = %vector.body, %vector.ph
   2665   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2666   %gep.a = getelementptr inbounds i32* %a, i64 %index
   2667   %gep.b = getelementptr inbounds i32* %b, i64 %index
   2668   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   2669   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
   2670   %load.a = load <8 x i32>* %ptr.a, align 2
   2671   %load.b = load <8 x i32>* %ptr.b, align 2
   2672   %cmp = icmp sge <8 x i32> %load.a, %load.b
   2673   %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
   2674   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
   2675   %index.next = add i64 %index, 8
   2676   %loop = icmp eq i64 %index.next, 16384
   2677   br i1 %loop, label %for.end, label %vector.body
   2678 
   2679 for.end:                                          ; preds = %vector.body
   2680   ret void
   2681 
   2682 ; AVX2: test92:
   2683 ; AVX2: vpminsd
   2684 }
   2685 
   2686 define void @test93(i32* nocapture %a, i32* nocapture %b) nounwind {
   2687 vector.ph:
   2688   br label %vector.body
   2689 
   2690 vector.body:                                      ; preds = %vector.body, %vector.ph
   2691   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2692   %gep.a = getelementptr inbounds i32* %a, i64 %index
   2693   %gep.b = getelementptr inbounds i32* %b, i64 %index
   2694   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   2695   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
   2696   %load.a = load <8 x i32>* %ptr.a, align 2
   2697   %load.b = load <8 x i32>* %ptr.b, align 2
   2698   %cmp = icmp ult <8 x i32> %load.a, %load.b
   2699   %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
   2700   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
   2701   %index.next = add i64 %index, 8
   2702   %loop = icmp eq i64 %index.next, 16384
   2703   br i1 %loop, label %for.end, label %vector.body
   2704 
   2705 for.end:                                          ; preds = %vector.body
   2706   ret void
   2707 
   2708 ; AVX2: test93:
   2709 ; AVX2: vpmaxud
   2710 }
   2711 
   2712 define void @test94(i32* nocapture %a, i32* nocapture %b) nounwind {
   2713 vector.ph:
   2714   br label %vector.body
   2715 
   2716 vector.body:                                      ; preds = %vector.body, %vector.ph
   2717   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2718   %gep.a = getelementptr inbounds i32* %a, i64 %index
   2719   %gep.b = getelementptr inbounds i32* %b, i64 %index
   2720   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   2721   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
   2722   %load.a = load <8 x i32>* %ptr.a, align 2
   2723   %load.b = load <8 x i32>* %ptr.b, align 2
   2724   %cmp = icmp ule <8 x i32> %load.a, %load.b
   2725   %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
   2726   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
   2727   %index.next = add i64 %index, 8
   2728   %loop = icmp eq i64 %index.next, 16384
   2729   br i1 %loop, label %for.end, label %vector.body
   2730 
   2731 for.end:                                          ; preds = %vector.body
   2732   ret void
   2733 
   2734 ; AVX2: test94:
   2735 ; AVX2: vpmaxud
   2736 }
   2737 
   2738 define void @test95(i32* nocapture %a, i32* nocapture %b) nounwind {
   2739 vector.ph:
   2740   br label %vector.body
   2741 
   2742 vector.body:                                      ; preds = %vector.body, %vector.ph
   2743   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2744   %gep.a = getelementptr inbounds i32* %a, i64 %index
   2745   %gep.b = getelementptr inbounds i32* %b, i64 %index
   2746   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   2747   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
   2748   %load.a = load <8 x i32>* %ptr.a, align 2
   2749   %load.b = load <8 x i32>* %ptr.b, align 2
   2750   %cmp = icmp ugt <8 x i32> %load.a, %load.b
   2751   %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
   2752   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
   2753   %index.next = add i64 %index, 8
   2754   %loop = icmp eq i64 %index.next, 16384
   2755   br i1 %loop, label %for.end, label %vector.body
   2756 
   2757 for.end:                                          ; preds = %vector.body
   2758   ret void
   2759 
   2760 ; AVX2: test95:
   2761 ; AVX2: vpminud
   2762 }
   2763 
   2764 define void @test96(i32* nocapture %a, i32* nocapture %b) nounwind {
   2765 vector.ph:
   2766   br label %vector.body
   2767 
   2768 vector.body:                                      ; preds = %vector.body, %vector.ph
   2769   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   2770   %gep.a = getelementptr inbounds i32* %a, i64 %index
   2771   %gep.b = getelementptr inbounds i32* %b, i64 %index
   2772   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   2773   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
   2774   %load.a = load <8 x i32>* %ptr.a, align 2
   2775   %load.b = load <8 x i32>* %ptr.b, align 2
   2776   %cmp = icmp uge <8 x i32> %load.a, %load.b
   2777   %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
   2778   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
   2779   %index.next = add i64 %index, 8
   2780   %loop = icmp eq i64 %index.next, 16384
   2781   br i1 %loop, label %for.end, label %vector.body
   2782 
   2783 for.end:                                          ; preds = %vector.body
   2784   ret void
   2785 
   2786 ; AVX2: test96:
   2787 ; AVX2: vpminud
   2788 }
   2789