Home | History | Annotate | Download | only in LoopVectorize
      1 ; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-interleave=1  < %s | FileCheck %s
      2 
      3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
      4 
      5 @A = common global [1024 x i32] zeroinitializer, align 16
      6 @fA = common global [1024 x float] zeroinitializer, align 16
      7 @dA = common global [1024 x double] zeroinitializer, align 16
      8 
      9 ; Signed tests.
     10 
     11 ; Turn this into a max reduction. Make sure we use a splat to initialize the
     12 ; vector for the reduction.
     13 ; CHECK-LABEL: @max_red(
     14 ; CHECK: %[[VAR:.*]] = insertelement <2 x i32> undef, i32 %max, i32 0
     15 ; CHECK: {{.*}} = shufflevector <2 x i32> %[[VAR]], <2 x i32> undef, <2 x i32> zeroinitializer
     16 ; CHECK: icmp sgt <2 x i32>
     17 ; CHECK: select <2 x i1>
     18 ; CHECK: middle.block
     19 ; CHECK: icmp sgt <2 x i32>
     20 ; CHECK: select i1
     21 
     22 define i32 @max_red(i32 %max) {
     23 entry:
     24   br label %for.body
     25 
     26 for.body:
     27   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
     28   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
     29   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
     30   %0 = load i32, i32* %arrayidx, align 4
     31   %cmp3 = icmp sgt i32 %0, %max.red.08
     32   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
     33   %indvars.iv.next = add i64 %indvars.iv, 1
     34   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
     35   %exitcond = icmp eq i32 %lftr.wideiv, 1024
     36   br i1 %exitcond, label %for.end, label %for.body
     37 
     38 for.end:
     39   ret i32 %max.red.0
     40 }
     41 
     42 ; Turn this into a max reduction. The select has its inputs reversed therefore
     43 ; this is a max reduction.
     44 ; CHECK-LABEL: @max_red_inverse_select(
     45 ; CHECK: icmp slt <2 x i32>
     46 ; CHECK: select <2 x i1>
     47 ; CHECK: middle.block
     48 ; CHECK: icmp sgt <2 x i32>
     49 ; CHECK: select i1
     50 
     51 define i32 @max_red_inverse_select(i32 %max) {
     52 entry:
     53   br label %for.body
     54 
     55 for.body:
     56   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
     57   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
     58   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
     59   %0 = load i32, i32* %arrayidx, align 4
     60   %cmp3 = icmp slt i32 %max.red.08, %0
     61   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
     62   %indvars.iv.next = add i64 %indvars.iv, 1
     63   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
     64   %exitcond = icmp eq i32 %lftr.wideiv, 1024
     65   br i1 %exitcond, label %for.end, label %for.body
     66 
     67 for.end:
     68   ret i32 %max.red.0
     69 }
     70 
     71 ; Turn this into a min reduction.
     72 ; CHECK-LABEL: @min_red(
     73 ; CHECK: icmp slt <2 x i32>
     74 ; CHECK: select <2 x i1>
     75 ; CHECK: middle.block
     76 ; CHECK: icmp slt <2 x i32>
     77 ; CHECK: select i1
     78 
     79 define i32 @min_red(i32 %max) {
     80 entry:
     81   br label %for.body
     82 
     83 for.body:
     84   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
     85   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
     86   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
     87   %0 = load i32, i32* %arrayidx, align 4
     88   %cmp3 = icmp slt i32 %0, %max.red.08
     89   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
     90   %indvars.iv.next = add i64 %indvars.iv, 1
     91   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
     92   %exitcond = icmp eq i32 %lftr.wideiv, 1024
     93   br i1 %exitcond, label %for.end, label %for.body
     94 
     95 for.end:
     96   ret i32 %max.red.0
     97 }
     98 
     99 ; Turn this into a min reduction. The select has its inputs reversed therefore
    100 ; this is a min reduction.
    101 ; CHECK-LABEL: @min_red_inverse_select(
    102 ; CHECK: icmp sgt <2 x i32>
    103 ; CHECK: select <2 x i1>
    104 ; CHECK: middle.block
    105 ; CHECK: icmp slt <2 x i32>
    106 ; CHECK: select i1
    107 
    108 define i32 @min_red_inverse_select(i32 %max) {
    109 entry:
    110   br label %for.body
    111 
    112 for.body:
    113   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    114   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
    115   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
    116   %0 = load i32, i32* %arrayidx, align 4
    117   %cmp3 = icmp sgt i32 %max.red.08, %0
    118   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
    119   %indvars.iv.next = add i64 %indvars.iv, 1
    120   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    121   %exitcond = icmp eq i32 %lftr.wideiv, 1024
    122   br i1 %exitcond, label %for.end, label %for.body
    123 
    124 for.end:
    125   ret i32 %max.red.0
    126 }
    127 
    128 ; Unsigned tests.
    129 
    130 ; Turn this into a max reduction.
    131 ; CHECK-LABEL: @umax_red(
    132 ; CHECK: icmp ugt <2 x i32>
    133 ; CHECK: select <2 x i1>
    134 ; CHECK: middle.block
    135 ; CHECK: icmp ugt <2 x i32>
    136 ; CHECK: select i1
    137 
    138 define i32 @umax_red(i32 %max) {
    139 entry:
    140   br label %for.body
    141 
    142 for.body:
    143   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    144   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
    145   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
    146   %0 = load i32, i32* %arrayidx, align 4
    147   %cmp3 = icmp ugt i32 %0, %max.red.08
    148   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
    149   %indvars.iv.next = add i64 %indvars.iv, 1
    150   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    151   %exitcond = icmp eq i32 %lftr.wideiv, 1024
    152   br i1 %exitcond, label %for.end, label %for.body
    153 
    154 for.end:
    155   ret i32 %max.red.0
    156 }
    157 
    158 ; Turn this into a max reduction. The select has its inputs reversed therefore
    159 ; this is a max reduction.
    160 ; CHECK-LABEL: @umax_red_inverse_select(
    161 ; CHECK: icmp ult <2 x i32>
    162 ; CHECK: select <2 x i1>
    163 ; CHECK: middle.block
    164 ; CHECK: icmp ugt <2 x i32>
    165 ; CHECK: select i1
    166 
    167 define i32 @umax_red_inverse_select(i32 %max) {
    168 entry:
    169   br label %for.body
    170 
    171 for.body:
    172   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    173   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
    174   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
    175   %0 = load i32, i32* %arrayidx, align 4
    176   %cmp3 = icmp ult i32 %max.red.08, %0
    177   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
    178   %indvars.iv.next = add i64 %indvars.iv, 1
    179   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    180   %exitcond = icmp eq i32 %lftr.wideiv, 1024
    181   br i1 %exitcond, label %for.end, label %for.body
    182 
    183 for.end:
    184   ret i32 %max.red.0
    185 }
    186 
    187 ; Turn this into a min reduction.
    188 ; CHECK-LABEL: @umin_red(
    189 ; CHECK: icmp ult <2 x i32>
    190 ; CHECK: select <2 x i1>
    191 ; CHECK: middle.block
    192 ; CHECK: icmp ult <2 x i32>
    193 ; CHECK: select i1
    194 
    195 define i32 @umin_red(i32 %max) {
    196 entry:
    197   br label %for.body
    198 
    199 for.body:
    200   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    201   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
    202   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
    203   %0 = load i32, i32* %arrayidx, align 4
    204   %cmp3 = icmp ult i32 %0, %max.red.08
    205   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
    206   %indvars.iv.next = add i64 %indvars.iv, 1
    207   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    208   %exitcond = icmp eq i32 %lftr.wideiv, 1024
    209   br i1 %exitcond, label %for.end, label %for.body
    210 
    211 for.end:
    212   ret i32 %max.red.0
    213 }
    214 
    215 ; Turn this into a min reduction. The select has its inputs reversed therefore
    216 ; this is a min reduction.
    217 ; CHECK-LABEL: @umin_red_inverse_select(
    218 ; CHECK: icmp ugt <2 x i32>
    219 ; CHECK: select <2 x i1>
    220 ; CHECK: middle.block
    221 ; CHECK: icmp ult <2 x i32>
    222 ; CHECK: select i1
    223 
    224 define i32 @umin_red_inverse_select(i32 %max) {
    225 entry:
    226   br label %for.body
    227 
    228 for.body:
    229   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    230   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
    231   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
    232   %0 = load i32, i32* %arrayidx, align 4
    233   %cmp3 = icmp ugt i32 %max.red.08, %0
    234   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
    235   %indvars.iv.next = add i64 %indvars.iv, 1
    236   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    237   %exitcond = icmp eq i32 %lftr.wideiv, 1024
    238   br i1 %exitcond, label %for.end, label %for.body
    239 
    240 for.end:
    241   ret i32 %max.red.0
    242 }
    243 
    244 ; SGE -> SLT
    245 ; Turn this into a min reduction (select inputs are reversed).
    246 ; CHECK-LABEL: @sge_min_red(
    247 ; CHECK: icmp sge <2 x i32>
    248 ; CHECK: select <2 x i1>
    249 ; CHECK: middle.block
    250 ; CHECK: icmp slt <2 x i32>
    251 ; CHECK: select i1
    252 
    253 define i32 @sge_min_red(i32 %max) {
    254 entry:
    255   br label %for.body
    256 
    257 for.body:
    258   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    259   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
    260   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
    261   %0 = load i32, i32* %arrayidx, align 4
    262   %cmp3 = icmp sge i32 %0, %max.red.08
    263   %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
    264   %indvars.iv.next = add i64 %indvars.iv, 1
    265   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    266   %exitcond = icmp eq i32 %lftr.wideiv, 1024
    267   br i1 %exitcond, label %for.end, label %for.body
    268 
    269 for.end:
    270   ret i32 %max.red.0
    271 }
    272 
    273 ; SLE -> SGT
    274 ; Turn this into a max reduction (select inputs are reversed).
    275 ; CHECK-LABEL: @sle_min_red(
    276 ; CHECK: icmp sle <2 x i32>
    277 ; CHECK: select <2 x i1>
    278 ; CHECK: middle.block
    279 ; CHECK: icmp sgt <2 x i32>
    280 ; CHECK: select i1
    281 
    282 define i32 @sle_min_red(i32 %max) {
    283 entry:
    284   br label %for.body
    285 
    286 for.body:
    287   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    288   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
    289   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
    290   %0 = load i32, i32* %arrayidx, align 4
    291   %cmp3 = icmp sle i32 %0, %max.red.08
    292   %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
    293   %indvars.iv.next = add i64 %indvars.iv, 1
    294   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    295   %exitcond = icmp eq i32 %lftr.wideiv, 1024
    296   br i1 %exitcond, label %for.end, label %for.body
    297 
    298 for.end:
    299   ret i32 %max.red.0
    300 }
    301 
    302 ; UGE -> ULT
    303 ; Turn this into a min reduction (select inputs are reversed).
    304 ; CHECK-LABEL: @uge_min_red(
    305 ; CHECK: icmp uge <2 x i32>
    306 ; CHECK: select <2 x i1>
    307 ; CHECK: middle.block
    308 ; CHECK: icmp ult <2 x i32>
    309 ; CHECK: select i1
    310 
    311 define i32 @uge_min_red(i32 %max) {
    312 entry:
    313   br label %for.body
    314 
    315 for.body:
    316   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    317   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
    318   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
    319   %0 = load i32, i32* %arrayidx, align 4
    320   %cmp3 = icmp uge i32 %0, %max.red.08
    321   %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
    322   %indvars.iv.next = add i64 %indvars.iv, 1
    323   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    324   %exitcond = icmp eq i32 %lftr.wideiv, 1024
    325   br i1 %exitcond, label %for.end, label %for.body
    326 
    327 for.end:
    328   ret i32 %max.red.0
    329 }
    330 
    331 ; ULE -> UGT
    332 ; Turn this into a max reduction (select inputs are reversed).
    333 ; CHECK-LABEL: @ule_min_red(
    334 ; CHECK: icmp ule <2 x i32>
    335 ; CHECK: select <2 x i1>
    336 ; CHECK: middle.block
    337 ; CHECK: icmp ugt <2 x i32>
    338 ; CHECK: select i1
    339 
    340 define i32 @ule_min_red(i32 %max) {
    341 entry:
    342   br label %for.body
    343 
    344 for.body:
    345   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    346   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
    347   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
    348   %0 = load i32, i32* %arrayidx, align 4
    349   %cmp3 = icmp ule i32 %0, %max.red.08
    350   %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
    351   %indvars.iv.next = add i64 %indvars.iv, 1
    352   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    353   %exitcond = icmp eq i32 %lftr.wideiv, 1024
    354   br i1 %exitcond, label %for.end, label %for.body
    355 
    356 for.end:
    357   ret i32 %max.red.0
    358 }
    359 
    360 ; No reduction.
    361 ; CHECK-LABEL: @no_red_1(
    362 ; CHECK-NOT: icmp <2 x i32>
    363 define i32 @no_red_1(i32 %max) {
    364 entry:
    365   br label %for.body
    366 
    367 for.body:
    368   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    369   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
    370   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
    371   %arrayidx1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 1, i64 %indvars.iv
    372   %0 = load i32, i32* %arrayidx, align 4
    373   %1 = load i32, i32* %arrayidx1, align 4
    374   %cmp3 = icmp sgt i32 %0, %1
    375   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
    376   %indvars.iv.next = add i64 %indvars.iv, 1
    377   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    378   %exitcond = icmp eq i32 %lftr.wideiv, 1024
    379   br i1 %exitcond, label %for.end, label %for.body
    380 
    381 for.end:
    382   ret i32 %max.red.0
    383 }
    384 
    385 ; CHECK-LABEL: @no_red_2(
    386 ; CHECK-NOT: icmp <2 x i32>
    387 define i32 @no_red_2(i32 %max) {
    388 entry:
    389   br label %for.body
    390 
    391 for.body:
    392   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    393   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
    394   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
    395   %arrayidx1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 1, i64 %indvars.iv
    396   %0 = load i32, i32* %arrayidx, align 4
    397   %1 = load i32, i32* %arrayidx1, align 4
    398   %cmp3 = icmp sgt i32 %0, %max.red.08
    399   %max.red.0 = select i1 %cmp3, i32 %0, i32 %1
    400   %indvars.iv.next = add i64 %indvars.iv, 1
    401   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    402   %exitcond = icmp eq i32 %lftr.wideiv, 1024
    403   br i1 %exitcond, label %for.end, label %for.body
    404 
    405 for.end:
    406   ret i32 %max.red.0
    407 }
    408 
    409 ; Float tests.
    410 
    411 ; Maximum.
    412 
    413 ; Turn this into a max reduction in the presence of a no-nans-fp-math attribute.
    414 ; CHECK-LABEL: @max_red_float(
    415 ; CHECK: fcmp fast ogt <2 x float>
    416 ; CHECK: select <2 x i1>
    417 ; CHECK: middle.block
    418 ; CHECK: fcmp fast ogt <2 x float>
    419 ; CHECK: select i1
    420 
    421 define float @max_red_float(float %max) #0 {
    422 entry:
    423   br label %for.body
    424 
    425 for.body:
    426   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    427   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
    428   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
    429   %0 = load float, float* %arrayidx, align 4
    430   %cmp3 = fcmp fast ogt float %0, %max.red.08
    431   %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
    432   %indvars.iv.next = add i64 %indvars.iv, 1
    433   %exitcond = icmp eq i64 %indvars.iv.next, 1024
    434   br i1 %exitcond, label %for.end, label %for.body
    435 
    436 for.end:
    437   ret float %max.red.0
    438 }
    439 
    440 ; CHECK-LABEL: @max_red_float_ge(
    441 ; CHECK: fcmp fast oge <2 x float>
    442 ; CHECK: select <2 x i1>
    443 ; CHECK: middle.block
    444 ; CHECK: fcmp fast ogt <2 x float>
    445 ; CHECK: select i1
    446 
    447 define float @max_red_float_ge(float %max) #0 {
    448 entry:
    449   br label %for.body
    450 
    451 for.body:
    452   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    453   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
    454   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
    455   %0 = load float, float* %arrayidx, align 4
    456   %cmp3 = fcmp fast oge float %0, %max.red.08
    457   %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
    458   %indvars.iv.next = add i64 %indvars.iv, 1
    459   %exitcond = icmp eq i64 %indvars.iv.next, 1024
    460   br i1 %exitcond, label %for.end, label %for.body
    461 
    462 for.end:
    463   ret float %max.red.0
    464 }
    465 
    466 ; CHECK-LABEL: @inverted_max_red_float(
    467 ; CHECK: fcmp fast olt <2 x float>
    468 ; CHECK: select <2 x i1>
    469 ; CHECK: middle.block
    470 ; CHECK: fcmp fast ogt <2 x float>
    471 ; CHECK: select i1
    472 
    473 define float @inverted_max_red_float(float %max) #0 {
    474 entry:
    475   br label %for.body
    476 
    477 for.body:
    478   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    479   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
    480   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
    481   %0 = load float, float* %arrayidx, align 4
    482   %cmp3 = fcmp fast olt float %0, %max.red.08
    483   %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
    484   %indvars.iv.next = add i64 %indvars.iv, 1
    485   %exitcond = icmp eq i64 %indvars.iv.next, 1024
    486   br i1 %exitcond, label %for.end, label %for.body
    487 
    488 for.end:
    489   ret float %max.red.0
    490 }
    491 
    492 ; CHECK-LABEL: @inverted_max_red_float_le(
    493 ; CHECK: fcmp fast ole <2 x float>
    494 ; CHECK: select <2 x i1>
    495 ; CHECK: middle.block
    496 ; CHECK: fcmp fast ogt <2 x float>
    497 ; CHECK: select i1
    498 
    499 define float @inverted_max_red_float_le(float %max) #0 {
    500 entry:
    501   br label %for.body
    502 
    503 for.body:
    504   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    505   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
    506   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
    507   %0 = load float, float* %arrayidx, align 4
    508   %cmp3 = fcmp fast ole float %0, %max.red.08
    509   %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
    510   %indvars.iv.next = add i64 %indvars.iv, 1
    511   %exitcond = icmp eq i64 %indvars.iv.next, 1024
    512   br i1 %exitcond, label %for.end, label %for.body
    513 
    514 for.end:
    515   ret float %max.red.0
    516 }
    517 
    518 ; CHECK-LABEL: @unordered_max_red_float(
    519 ; CHECK: fcmp fast ole <2 x float>
    520 ; CHECK: select <2 x i1>
    521 ; CHECK: middle.block
    522 ; CHECK: fcmp fast ogt <2 x float>
    523 ; CHECK: select i1
    524 
    525 define float @unordered_max_red_float(float %max) #0 {
    526 entry:
    527   br label %for.body
    528 
    529 for.body:
    530   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    531   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
    532   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
    533   %0 = load float, float* %arrayidx, align 4
    534   %cmp3 = fcmp fast ugt float %0, %max.red.08
    535   %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
    536   %indvars.iv.next = add i64 %indvars.iv, 1
    537   %exitcond = icmp eq i64 %indvars.iv.next, 1024
    538   br i1 %exitcond, label %for.end, label %for.body
    539 
    540 for.end:
    541   ret float %max.red.0
    542 }
    543 
    544 ; CHECK-LABEL: @unordered_max_red_float_ge(
    545 ; CHECK: fcmp fast olt <2 x float>
    546 ; CHECK: select <2 x i1>
    547 ; CHECK: middle.block
    548 ; CHECK: fcmp fast ogt <2 x float>
    549 ; CHECK: select i1
    550 
    551 define float @unordered_max_red_float_ge(float %max) #0 {
    552 entry:
    553   br label %for.body
    554 
    555 for.body:
    556   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    557   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
    558   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
    559   %0 = load float, float* %arrayidx, align 4
    560   %cmp3 = fcmp fast uge float %0, %max.red.08
    561   %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
    562   %indvars.iv.next = add i64 %indvars.iv, 1
    563   %exitcond = icmp eq i64 %indvars.iv.next, 1024
    564   br i1 %exitcond, label %for.end, label %for.body
    565 
    566 for.end:
    567   ret float %max.red.0
    568 }
    569 
    570 ; CHECK-LABEL: @inverted_unordered_max_red_float(
    571 ; CHECK: fcmp fast oge <2 x float>
    572 ; CHECK: select <2 x i1>
    573 ; CHECK: middle.block
    574 ; CHECK: fcmp fast ogt <2 x float>
    575 ; CHECK: select i1
    576 
    577 define float @inverted_unordered_max_red_float(float %max) #0 {
    578 entry:
    579   br label %for.body
    580 
    581 for.body:
    582   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    583   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
    584   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
    585   %0 = load float, float* %arrayidx, align 4
    586   %cmp3 = fcmp fast ult float %0, %max.red.08
    587   %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
    588   %indvars.iv.next = add i64 %indvars.iv, 1
    589   %exitcond = icmp eq i64 %indvars.iv.next, 1024
    590   br i1 %exitcond, label %for.end, label %for.body
    591 
    592 for.end:
    593   ret float %max.red.0
    594 }
    595 
    596 ; CHECK-LABEL: @inverted_unordered_max_red_float_le(
    597 ; CHECK: fcmp fast ogt <2 x float>
    598 ; CHECK: select <2 x i1>
    599 ; CHECK: middle.block
    600 ; CHECK: fcmp fast ogt <2 x float>
    601 ; CHECK: select i1
    602 
    603 define float @inverted_unordered_max_red_float_le(float %max) #0 {
    604 entry:
    605   br label %for.body
    606 
    607 for.body:
    608   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    609   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
    610   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
    611   %0 = load float, float* %arrayidx, align 4
    612   %cmp3 = fcmp fast ule float %0, %max.red.08
    613   %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
    614   %indvars.iv.next = add i64 %indvars.iv, 1
    615   %exitcond = icmp eq i64 %indvars.iv.next, 1024
    616   br i1 %exitcond, label %for.end, label %for.body
    617 
    618 for.end:
    619   ret float %max.red.0
    620 }
    621 
    622 ; Minimum.
    623 
    624 ; Turn this into a min reduction in the presence of a no-nans-fp-math attribute.
    625 ; CHECK-LABEL: @min_red_float(
    626 ; CHECK: fcmp fast olt <2 x float>
    627 ; CHECK: select <2 x i1>
    628 ; CHECK: middle.block
    629 ; CHECK: fcmp fast olt <2 x float>
    630 ; CHECK: select i1
    631 
    632 define float @min_red_float(float %min) #0 {
    633 entry:
    634   br label %for.body
    635 
    636 for.body:
    637   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    638   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
    639   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
    640   %0 = load float, float* %arrayidx, align 4
    641   %cmp3 = fcmp fast olt float %0, %min.red.08
    642   %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
    643   %indvars.iv.next = add i64 %indvars.iv, 1
    644   %exitcond = icmp eq i64 %indvars.iv.next, 1024
    645   br i1 %exitcond, label %for.end, label %for.body
    646 
    647 for.end:
    648   ret float %min.red.0
    649 }
    650 
    651 ; CHECK-LABEL: @min_red_float_le(
    652 ; CHECK: fcmp fast ole <2 x float>
    653 ; CHECK: select <2 x i1>
    654 ; CHECK: middle.block
    655 ; CHECK: fcmp fast olt <2 x float>
    656 ; CHECK: select i1
    657 
    658 define float @min_red_float_le(float %min) #0 {
    659 entry:
    660   br label %for.body
    661 
    662 for.body:
    663   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    664   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
    665   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
    666   %0 = load float, float* %arrayidx, align 4
    667   %cmp3 = fcmp fast ole float %0, %min.red.08
    668   %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
    669   %indvars.iv.next = add i64 %indvars.iv, 1
    670   %exitcond = icmp eq i64 %indvars.iv.next, 1024
    671   br i1 %exitcond, label %for.end, label %for.body
    672 
    673 for.end:
    674   ret float %min.red.0
    675 }
    676 
    677 ; CHECK-LABEL: @inverted_min_red_float(
    678 ; CHECK: fcmp fast ogt <2 x float>
    679 ; CHECK: select <2 x i1>
    680 ; CHECK: middle.block
    681 ; CHECK: fcmp fast olt <2 x float>
    682 ; CHECK: select i1
    683 
    684 define float @inverted_min_red_float(float %min) #0 {
    685 entry:
    686   br label %for.body
    687 
    688 for.body:
    689   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    690   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
    691   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
    692   %0 = load float, float* %arrayidx, align 4
    693   %cmp3 = fcmp fast ogt float %0, %min.red.08
    694   %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
    695   %indvars.iv.next = add i64 %indvars.iv, 1
    696   %exitcond = icmp eq i64 %indvars.iv.next, 1024
    697   br i1 %exitcond, label %for.end, label %for.body
    698 
    699 for.end:
    700   ret float %min.red.0
    701 }
    702 
    703 ; CHECK-LABEL: @inverted_min_red_float_ge(
    704 ; CHECK: fcmp fast oge <2 x float>
    705 ; CHECK: select <2 x i1>
    706 ; CHECK: middle.block
    707 ; CHECK: fcmp fast olt <2 x float>
    708 ; CHECK: select i1
    709 
    710 define float @inverted_min_red_float_ge(float %min) #0 {
    711 entry:
    712   br label %for.body
    713 
    714 for.body:
    715   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    716   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
    717   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
    718   %0 = load float, float* %arrayidx, align 4
    719   %cmp3 = fcmp fast oge float %0, %min.red.08
    720   %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
    721   %indvars.iv.next = add i64 %indvars.iv, 1
    722   %exitcond = icmp eq i64 %indvars.iv.next, 1024
    723   br i1 %exitcond, label %for.end, label %for.body
    724 
    725 for.end:
    726   ret float %min.red.0
    727 }
    728 
    729 ; CHECK-LABEL: @unordered_min_red_float(
    730 ; CHECK: fcmp fast oge <2 x float>
    731 ; CHECK: select <2 x i1>
    732 ; CHECK: middle.block
    733 ; CHECK: fcmp fast olt <2 x float>
    734 ; CHECK: select i1
    735 
    736 define float @unordered_min_red_float(float %min) #0 {
    737 entry:
    738   br label %for.body
    739 
    740 for.body:
    741   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    742   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
    743   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
    744   %0 = load float, float* %arrayidx, align 4
    745   %cmp3 = fcmp fast ult float %0, %min.red.08
    746   %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
    747   %indvars.iv.next = add i64 %indvars.iv, 1
    748   %exitcond = icmp eq i64 %indvars.iv.next, 1024
    749   br i1 %exitcond, label %for.end, label %for.body
    750 
    751 for.end:
    752   ret float %min.red.0
    753 }
    754 
    755 ; CHECK-LABEL: @unordered_min_red_float_le(
    756 ; CHECK: fcmp fast ogt <2 x float>
    757 ; CHECK: select <2 x i1>
    758 ; CHECK: middle.block
    759 ; CHECK: fcmp fast olt <2 x float>
    760 ; CHECK: select i1
    761 
    762 define float @unordered_min_red_float_le(float %min) #0 {
    763 entry:
    764   br label %for.body
    765 
    766 for.body:
    767   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    768   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
    769   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
    770   %0 = load float, float* %arrayidx, align 4
    771   %cmp3 = fcmp fast ule float %0, %min.red.08
    772   %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
    773   %indvars.iv.next = add i64 %indvars.iv, 1
    774   %exitcond = icmp eq i64 %indvars.iv.next, 1024
    775   br i1 %exitcond, label %for.end, label %for.body
    776 
    777 for.end:
    778   ret float %min.red.0
    779 }
    780 
    781 ; CHECK-LABEL: @inverted_unordered_min_red_float(
    782 ; CHECK: fcmp fast ole <2 x float>
    783 ; CHECK: select <2 x i1>
    784 ; CHECK: middle.block
    785 ; CHECK: fcmp fast olt <2 x float>
    786 ; CHECK: select i1
    787 
    788 define float @inverted_unordered_min_red_float(float %min) #0 {
    789 entry:
    790   br label %for.body
    791 
    792 for.body:
    793   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    794   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
    795   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
    796   %0 = load float, float* %arrayidx, align 4
    797   %cmp3 = fcmp fast ugt float %0, %min.red.08
    798   %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
    799   %indvars.iv.next = add i64 %indvars.iv, 1
    800   %exitcond = icmp eq i64 %indvars.iv.next, 1024
    801   br i1 %exitcond, label %for.end, label %for.body
    802 
    803 for.end:
    804   ret float %min.red.0
    805 }
    806 
    807 ; CHECK-LABEL: @inverted_unordered_min_red_float_ge(
    808 ; CHECK: fcmp fast olt <2 x float>
    809 ; CHECK: select <2 x i1>
    810 ; CHECK: middle.block
    811 ; CHECK: fcmp fast olt <2 x float>
    812 ; CHECK: select i1
    813 
    814 define float @inverted_unordered_min_red_float_ge(float %min) #0 {
    815 entry:
    816   br label %for.body
    817 
    818 for.body:
    819   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    820   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
    821   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
    822   %0 = load float, float* %arrayidx, align 4
    823   %cmp3 = fcmp fast uge float %0, %min.red.08
    824   %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
    825   %indvars.iv.next = add i64 %indvars.iv, 1
    826   %exitcond = icmp eq i64 %indvars.iv.next, 1024
    827   br i1 %exitcond, label %for.end, label %for.body
    828 
    829 for.end:
    830   ret float %min.red.0
    831 }
    832 
    833 ; Make sure we handle doubles, too.
    834 ; CHECK-LABEL: @min_red_double(
    835 ; CHECK: fcmp fast olt <2 x double>
    836 ; CHECK: select <2 x i1>
    837 ; CHECK: middle.block
    838 ; CHECK: fcmp fast olt <2 x double>
    839 ; CHECK: select i1
    840 
    841 define double @min_red_double(double %min) #0 {
    842 entry:
    843   br label %for.body
    844 
    845 for.body:
    846   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    847   %min.red.08 = phi double [ %min, %entry ], [ %min.red.0, %for.body ]
    848   %arrayidx = getelementptr inbounds [1024 x double], [1024 x double]* @dA, i64 0, i64 %indvars.iv
    849   %0 = load double, double* %arrayidx, align 4
    850   %cmp3 = fcmp fast olt double %0, %min.red.08
    851   %min.red.0 = select i1 %cmp3, double %0, double %min.red.08
    852   %indvars.iv.next = add i64 %indvars.iv, 1
    853   %exitcond = icmp eq i64 %indvars.iv.next, 1024
    854   br i1 %exitcond, label %for.end, label %for.body
    855 
    856 for.end:
    857   ret double %min.red.0
    858 }
    859 
    860 
    861 ; Don't this into a max reduction. The no-nans-fp-math attribute is missing
    862 ; CHECK-LABEL: @max_red_float_nans(
    863 ; CHECK-NOT: <2 x float>
    864 
    865 define float @max_red_float_nans(float %max) {
    866 entry:
    867   br label %for.body
    868 
    869 for.body:
    870   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    871   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
    872   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
    873   %0 = load float, float* %arrayidx, align 4
    874   %cmp3 = fcmp fast ogt float %0, %max.red.08
    875   %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
    876   %indvars.iv.next = add i64 %indvars.iv, 1
    877   %exitcond = icmp eq i64 %indvars.iv.next, 1024
    878   br i1 %exitcond, label %for.end, label %for.body
    879 
    880 for.end:
    881   ret float %max.red.0
    882 }
    883 
    884 
    885 attributes #0 = { "no-nans-fp-math"="true" }
    886