1 ; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s 2 3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 4 5 @A = common global [1024 x i32] zeroinitializer, align 16 6 @fA = common global [1024 x float] zeroinitializer, align 16 7 @dA = common global [1024 x double] zeroinitializer, align 16 8 9 ; Signed tests. 10 11 ; Turn this into a max reduction. Make sure we use a splat to initialize the 12 ; vector for the reduction. 13 ; CHECK-LABEL: @max_red( 14 ; CHECK: %[[VAR:.*]] = insertelement <2 x i32> undef, i32 %max, i32 0 15 ; CHECK: {{.*}} = shufflevector <2 x i32> %[[VAR]], <2 x i32> undef, <2 x i32> zeroinitializer 16 ; CHECK: icmp sgt <2 x i32> 17 ; CHECK: select <2 x i1> 18 ; CHECK: middle.block 19 ; CHECK: icmp sgt <2 x i32> 20 ; CHECK: select i1 21 22 define i32 @max_red(i32 %max) { 23 entry: 24 br label %for.body 25 26 for.body: 27 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 28 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] 29 %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv 30 %0 = load i32, i32* %arrayidx, align 4 31 %cmp3 = icmp sgt i32 %0, %max.red.08 32 %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 33 %indvars.iv.next = add i64 %indvars.iv, 1 34 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 35 %exitcond = icmp eq i32 %lftr.wideiv, 1024 36 br i1 %exitcond, label %for.end, label %for.body 37 38 for.end: 39 ret i32 %max.red.0 40 } 41 42 ; Turn this into a max reduction. The select has its inputs reversed therefore 43 ; this is a max reduction. 44 ; CHECK-LABEL: @max_red_inverse_select( 45 ; CHECK: icmp slt <2 x i32> 46 ; CHECK: select <2 x i1> 47 ; CHECK: middle.block 48 ; CHECK: icmp sgt <2 x i32> 49 ; CHECK: select i1 50 51 define i32 @max_red_inverse_select(i32 %max) { 52 entry: 53 br label %for.body 54 55 for.body: 56 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 57 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] 58 %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv 59 %0 = load i32, i32* %arrayidx, align 4 60 %cmp3 = icmp slt i32 %max.red.08, %0 61 %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 62 %indvars.iv.next = add i64 %indvars.iv, 1 63 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 64 %exitcond = icmp eq i32 %lftr.wideiv, 1024 65 br i1 %exitcond, label %for.end, label %for.body 66 67 for.end: 68 ret i32 %max.red.0 69 } 70 71 ; Turn this into a min reduction. 72 ; CHECK-LABEL: @min_red( 73 ; CHECK: icmp slt <2 x i32> 74 ; CHECK: select <2 x i1> 75 ; CHECK: middle.block 76 ; CHECK: icmp slt <2 x i32> 77 ; CHECK: select i1 78 79 define i32 @min_red(i32 %max) { 80 entry: 81 br label %for.body 82 83 for.body: 84 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 85 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] 86 %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv 87 %0 = load i32, i32* %arrayidx, align 4 88 %cmp3 = icmp slt i32 %0, %max.red.08 89 %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 90 %indvars.iv.next = add i64 %indvars.iv, 1 91 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 92 %exitcond = icmp eq i32 %lftr.wideiv, 1024 93 br i1 %exitcond, label %for.end, label %for.body 94 95 for.end: 96 ret i32 %max.red.0 97 } 98 99 ; Turn this into a min reduction. The select has its inputs reversed therefore 100 ; this is a min reduction. 101 ; CHECK-LABEL: @min_red_inverse_select( 102 ; CHECK: icmp sgt <2 x i32> 103 ; CHECK: select <2 x i1> 104 ; CHECK: middle.block 105 ; CHECK: icmp slt <2 x i32> 106 ; CHECK: select i1 107 108 define i32 @min_red_inverse_select(i32 %max) { 109 entry: 110 br label %for.body 111 112 for.body: 113 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 114 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] 115 %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv 116 %0 = load i32, i32* %arrayidx, align 4 117 %cmp3 = icmp sgt i32 %max.red.08, %0 118 %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 119 %indvars.iv.next = add i64 %indvars.iv, 1 120 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 121 %exitcond = icmp eq i32 %lftr.wideiv, 1024 122 br i1 %exitcond, label %for.end, label %for.body 123 124 for.end: 125 ret i32 %max.red.0 126 } 127 128 ; Unsigned tests. 129 130 ; Turn this into a max reduction. 131 ; CHECK-LABEL: @umax_red( 132 ; CHECK: icmp ugt <2 x i32> 133 ; CHECK: select <2 x i1> 134 ; CHECK: middle.block 135 ; CHECK: icmp ugt <2 x i32> 136 ; CHECK: select i1 137 138 define i32 @umax_red(i32 %max) { 139 entry: 140 br label %for.body 141 142 for.body: 143 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 144 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] 145 %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv 146 %0 = load i32, i32* %arrayidx, align 4 147 %cmp3 = icmp ugt i32 %0, %max.red.08 148 %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 149 %indvars.iv.next = add i64 %indvars.iv, 1 150 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 151 %exitcond = icmp eq i32 %lftr.wideiv, 1024 152 br i1 %exitcond, label %for.end, label %for.body 153 154 for.end: 155 ret i32 %max.red.0 156 } 157 158 ; Turn this into a max reduction. The select has its inputs reversed therefore 159 ; this is a max reduction. 160 ; CHECK-LABEL: @umax_red_inverse_select( 161 ; CHECK: icmp ult <2 x i32> 162 ; CHECK: select <2 x i1> 163 ; CHECK: middle.block 164 ; CHECK: icmp ugt <2 x i32> 165 ; CHECK: select i1 166 167 define i32 @umax_red_inverse_select(i32 %max) { 168 entry: 169 br label %for.body 170 171 for.body: 172 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 173 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] 174 %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv 175 %0 = load i32, i32* %arrayidx, align 4 176 %cmp3 = icmp ult i32 %max.red.08, %0 177 %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 178 %indvars.iv.next = add i64 %indvars.iv, 1 179 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 180 %exitcond = icmp eq i32 %lftr.wideiv, 1024 181 br i1 %exitcond, label %for.end, label %for.body 182 183 for.end: 184 ret i32 %max.red.0 185 } 186 187 ; Turn this into a min reduction. 188 ; CHECK-LABEL: @umin_red( 189 ; CHECK: icmp ult <2 x i32> 190 ; CHECK: select <2 x i1> 191 ; CHECK: middle.block 192 ; CHECK: icmp ult <2 x i32> 193 ; CHECK: select i1 194 195 define i32 @umin_red(i32 %max) { 196 entry: 197 br label %for.body 198 199 for.body: 200 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 201 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] 202 %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv 203 %0 = load i32, i32* %arrayidx, align 4 204 %cmp3 = icmp ult i32 %0, %max.red.08 205 %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 206 %indvars.iv.next = add i64 %indvars.iv, 1 207 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 208 %exitcond = icmp eq i32 %lftr.wideiv, 1024 209 br i1 %exitcond, label %for.end, label %for.body 210 211 for.end: 212 ret i32 %max.red.0 213 } 214 215 ; Turn this into a min reduction. The select has its inputs reversed therefore 216 ; this is a min reduction. 217 ; CHECK-LABEL: @umin_red_inverse_select( 218 ; CHECK: icmp ugt <2 x i32> 219 ; CHECK: select <2 x i1> 220 ; CHECK: middle.block 221 ; CHECK: icmp ult <2 x i32> 222 ; CHECK: select i1 223 224 define i32 @umin_red_inverse_select(i32 %max) { 225 entry: 226 br label %for.body 227 228 for.body: 229 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 230 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] 231 %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv 232 %0 = load i32, i32* %arrayidx, align 4 233 %cmp3 = icmp ugt i32 %max.red.08, %0 234 %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 235 %indvars.iv.next = add i64 %indvars.iv, 1 236 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 237 %exitcond = icmp eq i32 %lftr.wideiv, 1024 238 br i1 %exitcond, label %for.end, label %for.body 239 240 for.end: 241 ret i32 %max.red.0 242 } 243 244 ; SGE -> SLT 245 ; Turn this into a min reduction (select inputs are reversed). 246 ; CHECK-LABEL: @sge_min_red( 247 ; CHECK: icmp sge <2 x i32> 248 ; CHECK: select <2 x i1> 249 ; CHECK: middle.block 250 ; CHECK: icmp slt <2 x i32> 251 ; CHECK: select i1 252 253 define i32 @sge_min_red(i32 %max) { 254 entry: 255 br label %for.body 256 257 for.body: 258 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 259 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] 260 %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv 261 %0 = load i32, i32* %arrayidx, align 4 262 %cmp3 = icmp sge i32 %0, %max.red.08 263 %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0 264 %indvars.iv.next = add i64 %indvars.iv, 1 265 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 266 %exitcond = icmp eq i32 %lftr.wideiv, 1024 267 br i1 %exitcond, label %for.end, label %for.body 268 269 for.end: 270 ret i32 %max.red.0 271 } 272 273 ; SLE -> SGT 274 ; Turn this into a max reduction (select inputs are reversed). 275 ; CHECK-LABEL: @sle_min_red( 276 ; CHECK: icmp sle <2 x i32> 277 ; CHECK: select <2 x i1> 278 ; CHECK: middle.block 279 ; CHECK: icmp sgt <2 x i32> 280 ; CHECK: select i1 281 282 define i32 @sle_min_red(i32 %max) { 283 entry: 284 br label %for.body 285 286 for.body: 287 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 288 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] 289 %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv 290 %0 = load i32, i32* %arrayidx, align 4 291 %cmp3 = icmp sle i32 %0, %max.red.08 292 %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0 293 %indvars.iv.next = add i64 %indvars.iv, 1 294 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 295 %exitcond = icmp eq i32 %lftr.wideiv, 1024 296 br i1 %exitcond, label %for.end, label %for.body 297 298 for.end: 299 ret i32 %max.red.0 300 } 301 302 ; UGE -> ULT 303 ; Turn this into a min reduction (select inputs are reversed). 304 ; CHECK-LABEL: @uge_min_red( 305 ; CHECK: icmp uge <2 x i32> 306 ; CHECK: select <2 x i1> 307 ; CHECK: middle.block 308 ; CHECK: icmp ult <2 x i32> 309 ; CHECK: select i1 310 311 define i32 @uge_min_red(i32 %max) { 312 entry: 313 br label %for.body 314 315 for.body: 316 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 317 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] 318 %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv 319 %0 = load i32, i32* %arrayidx, align 4 320 %cmp3 = icmp uge i32 %0, %max.red.08 321 %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0 322 %indvars.iv.next = add i64 %indvars.iv, 1 323 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 324 %exitcond = icmp eq i32 %lftr.wideiv, 1024 325 br i1 %exitcond, label %for.end, label %for.body 326 327 for.end: 328 ret i32 %max.red.0 329 } 330 331 ; ULE -> UGT 332 ; Turn this into a max reduction (select inputs are reversed). 333 ; CHECK-LABEL: @ule_min_red( 334 ; CHECK: icmp ule <2 x i32> 335 ; CHECK: select <2 x i1> 336 ; CHECK: middle.block 337 ; CHECK: icmp ugt <2 x i32> 338 ; CHECK: select i1 339 340 define i32 @ule_min_red(i32 %max) { 341 entry: 342 br label %for.body 343 344 for.body: 345 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 346 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] 347 %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv 348 %0 = load i32, i32* %arrayidx, align 4 349 %cmp3 = icmp ule i32 %0, %max.red.08 350 %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0 351 %indvars.iv.next = add i64 %indvars.iv, 1 352 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 353 %exitcond = icmp eq i32 %lftr.wideiv, 1024 354 br i1 %exitcond, label %for.end, label %for.body 355 356 for.end: 357 ret i32 %max.red.0 358 } 359 360 ; No reduction. 361 ; CHECK-LABEL: @no_red_1( 362 ; CHECK-NOT: icmp <2 x i32> 363 define i32 @no_red_1(i32 %max) { 364 entry: 365 br label %for.body 366 367 for.body: 368 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 369 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] 370 %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv 371 %arrayidx1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 1, i64 %indvars.iv 372 %0 = load i32, i32* %arrayidx, align 4 373 %1 = load i32, i32* %arrayidx1, align 4 374 %cmp3 = icmp sgt i32 %0, %1 375 %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 376 %indvars.iv.next = add i64 %indvars.iv, 1 377 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 378 %exitcond = icmp eq i32 %lftr.wideiv, 1024 379 br i1 %exitcond, label %for.end, label %for.body 380 381 for.end: 382 ret i32 %max.red.0 383 } 384 385 ; CHECK-LABEL: @no_red_2( 386 ; CHECK-NOT: icmp <2 x i32> 387 define i32 @no_red_2(i32 %max) { 388 entry: 389 br label %for.body 390 391 for.body: 392 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 393 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] 394 %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv 395 %arrayidx1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 1, i64 %indvars.iv 396 %0 = load i32, i32* %arrayidx, align 4 397 %1 = load i32, i32* %arrayidx1, align 4 398 %cmp3 = icmp sgt i32 %0, %max.red.08 399 %max.red.0 = select i1 %cmp3, i32 %0, i32 %1 400 %indvars.iv.next = add i64 %indvars.iv, 1 401 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 402 %exitcond = icmp eq i32 %lftr.wideiv, 1024 403 br i1 %exitcond, label %for.end, label %for.body 404 405 for.end: 406 ret i32 %max.red.0 407 } 408 409 ; Float tests. 410 411 ; Maximum. 412 413 ; Turn this into a max reduction in the presence of a no-nans-fp-math attribute. 414 ; CHECK-LABEL: @max_red_float( 415 ; CHECK: fcmp fast ogt <2 x float> 416 ; CHECK: select <2 x i1> 417 ; CHECK: middle.block 418 ; CHECK: fcmp fast ogt <2 x float> 419 ; CHECK: select i1 420 421 define float @max_red_float(float %max) #0 { 422 entry: 423 br label %for.body 424 425 for.body: 426 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 427 %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] 428 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 429 %0 = load float, float* %arrayidx, align 4 430 %cmp3 = fcmp fast ogt float %0, %max.red.08 431 %max.red.0 = select i1 %cmp3, float %0, float %max.red.08 432 %indvars.iv.next = add i64 %indvars.iv, 1 433 %exitcond = icmp eq i64 %indvars.iv.next, 1024 434 br i1 %exitcond, label %for.end, label %for.body 435 436 for.end: 437 ret float %max.red.0 438 } 439 440 ; CHECK-LABEL: @max_red_float_ge( 441 ; CHECK: fcmp fast oge <2 x float> 442 ; CHECK: select <2 x i1> 443 ; CHECK: middle.block 444 ; CHECK: fcmp fast ogt <2 x float> 445 ; CHECK: select i1 446 447 define float @max_red_float_ge(float %max) #0 { 448 entry: 449 br label %for.body 450 451 for.body: 452 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 453 %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] 454 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 455 %0 = load float, float* %arrayidx, align 4 456 %cmp3 = fcmp fast oge float %0, %max.red.08 457 %max.red.0 = select i1 %cmp3, float %0, float %max.red.08 458 %indvars.iv.next = add i64 %indvars.iv, 1 459 %exitcond = icmp eq i64 %indvars.iv.next, 1024 460 br i1 %exitcond, label %for.end, label %for.body 461 462 for.end: 463 ret float %max.red.0 464 } 465 466 ; CHECK-LABEL: @inverted_max_red_float( 467 ; CHECK: fcmp fast olt <2 x float> 468 ; CHECK: select <2 x i1> 469 ; CHECK: middle.block 470 ; CHECK: fcmp fast ogt <2 x float> 471 ; CHECK: select i1 472 473 define float @inverted_max_red_float(float %max) #0 { 474 entry: 475 br label %for.body 476 477 for.body: 478 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 479 %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] 480 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 481 %0 = load float, float* %arrayidx, align 4 482 %cmp3 = fcmp fast olt float %0, %max.red.08 483 %max.red.0 = select i1 %cmp3, float %max.red.08, float %0 484 %indvars.iv.next = add i64 %indvars.iv, 1 485 %exitcond = icmp eq i64 %indvars.iv.next, 1024 486 br i1 %exitcond, label %for.end, label %for.body 487 488 for.end: 489 ret float %max.red.0 490 } 491 492 ; CHECK-LABEL: @inverted_max_red_float_le( 493 ; CHECK: fcmp fast ole <2 x float> 494 ; CHECK: select <2 x i1> 495 ; CHECK: middle.block 496 ; CHECK: fcmp fast ogt <2 x float> 497 ; CHECK: select i1 498 499 define float @inverted_max_red_float_le(float %max) #0 { 500 entry: 501 br label %for.body 502 503 for.body: 504 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 505 %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] 506 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 507 %0 = load float, float* %arrayidx, align 4 508 %cmp3 = fcmp fast ole float %0, %max.red.08 509 %max.red.0 = select i1 %cmp3, float %max.red.08, float %0 510 %indvars.iv.next = add i64 %indvars.iv, 1 511 %exitcond = icmp eq i64 %indvars.iv.next, 1024 512 br i1 %exitcond, label %for.end, label %for.body 513 514 for.end: 515 ret float %max.red.0 516 } 517 518 ; CHECK-LABEL: @unordered_max_red_float( 519 ; CHECK: fcmp fast ole <2 x float> 520 ; CHECK: select <2 x i1> 521 ; CHECK: middle.block 522 ; CHECK: fcmp fast ogt <2 x float> 523 ; CHECK: select i1 524 525 define float @unordered_max_red_float(float %max) #0 { 526 entry: 527 br label %for.body 528 529 for.body: 530 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 531 %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] 532 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 533 %0 = load float, float* %arrayidx, align 4 534 %cmp3 = fcmp fast ugt float %0, %max.red.08 535 %max.red.0 = select i1 %cmp3, float %0, float %max.red.08 536 %indvars.iv.next = add i64 %indvars.iv, 1 537 %exitcond = icmp eq i64 %indvars.iv.next, 1024 538 br i1 %exitcond, label %for.end, label %for.body 539 540 for.end: 541 ret float %max.red.0 542 } 543 544 ; CHECK-LABEL: @unordered_max_red_float_ge( 545 ; CHECK: fcmp fast olt <2 x float> 546 ; CHECK: select <2 x i1> 547 ; CHECK: middle.block 548 ; CHECK: fcmp fast ogt <2 x float> 549 ; CHECK: select i1 550 551 define float @unordered_max_red_float_ge(float %max) #0 { 552 entry: 553 br label %for.body 554 555 for.body: 556 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 557 %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] 558 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 559 %0 = load float, float* %arrayidx, align 4 560 %cmp3 = fcmp fast uge float %0, %max.red.08 561 %max.red.0 = select i1 %cmp3, float %0, float %max.red.08 562 %indvars.iv.next = add i64 %indvars.iv, 1 563 %exitcond = icmp eq i64 %indvars.iv.next, 1024 564 br i1 %exitcond, label %for.end, label %for.body 565 566 for.end: 567 ret float %max.red.0 568 } 569 570 ; CHECK-LABEL: @inverted_unordered_max_red_float( 571 ; CHECK: fcmp fast oge <2 x float> 572 ; CHECK: select <2 x i1> 573 ; CHECK: middle.block 574 ; CHECK: fcmp fast ogt <2 x float> 575 ; CHECK: select i1 576 577 define float @inverted_unordered_max_red_float(float %max) #0 { 578 entry: 579 br label %for.body 580 581 for.body: 582 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 583 %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] 584 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 585 %0 = load float, float* %arrayidx, align 4 586 %cmp3 = fcmp fast ult float %0, %max.red.08 587 %max.red.0 = select i1 %cmp3, float %max.red.08, float %0 588 %indvars.iv.next = add i64 %indvars.iv, 1 589 %exitcond = icmp eq i64 %indvars.iv.next, 1024 590 br i1 %exitcond, label %for.end, label %for.body 591 592 for.end: 593 ret float %max.red.0 594 } 595 596 ; CHECK-LABEL: @inverted_unordered_max_red_float_le( 597 ; CHECK: fcmp fast ogt <2 x float> 598 ; CHECK: select <2 x i1> 599 ; CHECK: middle.block 600 ; CHECK: fcmp fast ogt <2 x float> 601 ; CHECK: select i1 602 603 define float @inverted_unordered_max_red_float_le(float %max) #0 { 604 entry: 605 br label %for.body 606 607 for.body: 608 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 609 %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] 610 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 611 %0 = load float, float* %arrayidx, align 4 612 %cmp3 = fcmp fast ule float %0, %max.red.08 613 %max.red.0 = select i1 %cmp3, float %max.red.08, float %0 614 %indvars.iv.next = add i64 %indvars.iv, 1 615 %exitcond = icmp eq i64 %indvars.iv.next, 1024 616 br i1 %exitcond, label %for.end, label %for.body 617 618 for.end: 619 ret float %max.red.0 620 } 621 622 ; Minimum. 623 624 ; Turn this into a min reduction in the presence of a no-nans-fp-math attribute. 625 ; CHECK-LABEL: @min_red_float( 626 ; CHECK: fcmp fast olt <2 x float> 627 ; CHECK: select <2 x i1> 628 ; CHECK: middle.block 629 ; CHECK: fcmp fast olt <2 x float> 630 ; CHECK: select i1 631 632 define float @min_red_float(float %min) #0 { 633 entry: 634 br label %for.body 635 636 for.body: 637 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 638 %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] 639 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 640 %0 = load float, float* %arrayidx, align 4 641 %cmp3 = fcmp fast olt float %0, %min.red.08 642 %min.red.0 = select i1 %cmp3, float %0, float %min.red.08 643 %indvars.iv.next = add i64 %indvars.iv, 1 644 %exitcond = icmp eq i64 %indvars.iv.next, 1024 645 br i1 %exitcond, label %for.end, label %for.body 646 647 for.end: 648 ret float %min.red.0 649 } 650 651 ; CHECK-LABEL: @min_red_float_le( 652 ; CHECK: fcmp fast ole <2 x float> 653 ; CHECK: select <2 x i1> 654 ; CHECK: middle.block 655 ; CHECK: fcmp fast olt <2 x float> 656 ; CHECK: select i1 657 658 define float @min_red_float_le(float %min) #0 { 659 entry: 660 br label %for.body 661 662 for.body: 663 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 664 %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] 665 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 666 %0 = load float, float* %arrayidx, align 4 667 %cmp3 = fcmp fast ole float %0, %min.red.08 668 %min.red.0 = select i1 %cmp3, float %0, float %min.red.08 669 %indvars.iv.next = add i64 %indvars.iv, 1 670 %exitcond = icmp eq i64 %indvars.iv.next, 1024 671 br i1 %exitcond, label %for.end, label %for.body 672 673 for.end: 674 ret float %min.red.0 675 } 676 677 ; CHECK-LABEL: @inverted_min_red_float( 678 ; CHECK: fcmp fast ogt <2 x float> 679 ; CHECK: select <2 x i1> 680 ; CHECK: middle.block 681 ; CHECK: fcmp fast olt <2 x float> 682 ; CHECK: select i1 683 684 define float @inverted_min_red_float(float %min) #0 { 685 entry: 686 br label %for.body 687 688 for.body: 689 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 690 %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] 691 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 692 %0 = load float, float* %arrayidx, align 4 693 %cmp3 = fcmp fast ogt float %0, %min.red.08 694 %min.red.0 = select i1 %cmp3, float %min.red.08, float %0 695 %indvars.iv.next = add i64 %indvars.iv, 1 696 %exitcond = icmp eq i64 %indvars.iv.next, 1024 697 br i1 %exitcond, label %for.end, label %for.body 698 699 for.end: 700 ret float %min.red.0 701 } 702 703 ; CHECK-LABEL: @inverted_min_red_float_ge( 704 ; CHECK: fcmp fast oge <2 x float> 705 ; CHECK: select <2 x i1> 706 ; CHECK: middle.block 707 ; CHECK: fcmp fast olt <2 x float> 708 ; CHECK: select i1 709 710 define float @inverted_min_red_float_ge(float %min) #0 { 711 entry: 712 br label %for.body 713 714 for.body: 715 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 716 %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] 717 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 718 %0 = load float, float* %arrayidx, align 4 719 %cmp3 = fcmp fast oge float %0, %min.red.08 720 %min.red.0 = select i1 %cmp3, float %min.red.08, float %0 721 %indvars.iv.next = add i64 %indvars.iv, 1 722 %exitcond = icmp eq i64 %indvars.iv.next, 1024 723 br i1 %exitcond, label %for.end, label %for.body 724 725 for.end: 726 ret float %min.red.0 727 } 728 729 ; CHECK-LABEL: @unordered_min_red_float( 730 ; CHECK: fcmp fast oge <2 x float> 731 ; CHECK: select <2 x i1> 732 ; CHECK: middle.block 733 ; CHECK: fcmp fast olt <2 x float> 734 ; CHECK: select i1 735 736 define float @unordered_min_red_float(float %min) #0 { 737 entry: 738 br label %for.body 739 740 for.body: 741 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 742 %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] 743 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 744 %0 = load float, float* %arrayidx, align 4 745 %cmp3 = fcmp fast ult float %0, %min.red.08 746 %min.red.0 = select i1 %cmp3, float %0, float %min.red.08 747 %indvars.iv.next = add i64 %indvars.iv, 1 748 %exitcond = icmp eq i64 %indvars.iv.next, 1024 749 br i1 %exitcond, label %for.end, label %for.body 750 751 for.end: 752 ret float %min.red.0 753 } 754 755 ; CHECK-LABEL: @unordered_min_red_float_le( 756 ; CHECK: fcmp fast ogt <2 x float> 757 ; CHECK: select <2 x i1> 758 ; CHECK: middle.block 759 ; CHECK: fcmp fast olt <2 x float> 760 ; CHECK: select i1 761 762 define float @unordered_min_red_float_le(float %min) #0 { 763 entry: 764 br label %for.body 765 766 for.body: 767 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 768 %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] 769 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 770 %0 = load float, float* %arrayidx, align 4 771 %cmp3 = fcmp fast ule float %0, %min.red.08 772 %min.red.0 = select i1 %cmp3, float %0, float %min.red.08 773 %indvars.iv.next = add i64 %indvars.iv, 1 774 %exitcond = icmp eq i64 %indvars.iv.next, 1024 775 br i1 %exitcond, label %for.end, label %for.body 776 777 for.end: 778 ret float %min.red.0 779 } 780 781 ; CHECK-LABEL: @inverted_unordered_min_red_float( 782 ; CHECK: fcmp fast ole <2 x float> 783 ; CHECK: select <2 x i1> 784 ; CHECK: middle.block 785 ; CHECK: fcmp fast olt <2 x float> 786 ; CHECK: select i1 787 788 define float @inverted_unordered_min_red_float(float %min) #0 { 789 entry: 790 br label %for.body 791 792 for.body: 793 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 794 %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] 795 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 796 %0 = load float, float* %arrayidx, align 4 797 %cmp3 = fcmp fast ugt float %0, %min.red.08 798 %min.red.0 = select i1 %cmp3, float %min.red.08, float %0 799 %indvars.iv.next = add i64 %indvars.iv, 1 800 %exitcond = icmp eq i64 %indvars.iv.next, 1024 801 br i1 %exitcond, label %for.end, label %for.body 802 803 for.end: 804 ret float %min.red.0 805 } 806 807 ; CHECK-LABEL: @inverted_unordered_min_red_float_ge( 808 ; CHECK: fcmp fast olt <2 x float> 809 ; CHECK: select <2 x i1> 810 ; CHECK: middle.block 811 ; CHECK: fcmp fast olt <2 x float> 812 ; CHECK: select i1 813 814 define float @inverted_unordered_min_red_float_ge(float %min) #0 { 815 entry: 816 br label %for.body 817 818 for.body: 819 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 820 %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] 821 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 822 %0 = load float, float* %arrayidx, align 4 823 %cmp3 = fcmp fast uge float %0, %min.red.08 824 %min.red.0 = select i1 %cmp3, float %min.red.08, float %0 825 %indvars.iv.next = add i64 %indvars.iv, 1 826 %exitcond = icmp eq i64 %indvars.iv.next, 1024 827 br i1 %exitcond, label %for.end, label %for.body 828 829 for.end: 830 ret float %min.red.0 831 } 832 833 ; Make sure we handle doubles, too. 834 ; CHECK-LABEL: @min_red_double( 835 ; CHECK: fcmp fast olt <2 x double> 836 ; CHECK: select <2 x i1> 837 ; CHECK: middle.block 838 ; CHECK: fcmp fast olt <2 x double> 839 ; CHECK: select i1 840 841 define double @min_red_double(double %min) #0 { 842 entry: 843 br label %for.body 844 845 for.body: 846 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 847 %min.red.08 = phi double [ %min, %entry ], [ %min.red.0, %for.body ] 848 %arrayidx = getelementptr inbounds [1024 x double], [1024 x double]* @dA, i64 0, i64 %indvars.iv 849 %0 = load double, double* %arrayidx, align 4 850 %cmp3 = fcmp fast olt double %0, %min.red.08 851 %min.red.0 = select i1 %cmp3, double %0, double %min.red.08 852 %indvars.iv.next = add i64 %indvars.iv, 1 853 %exitcond = icmp eq i64 %indvars.iv.next, 1024 854 br i1 %exitcond, label %for.end, label %for.body 855 856 for.end: 857 ret double %min.red.0 858 } 859 860 861 ; Don't this into a max reduction. The no-nans-fp-math attribute is missing 862 ; CHECK-LABEL: @max_red_float_nans( 863 ; CHECK-NOT: <2 x float> 864 865 define float @max_red_float_nans(float %max) { 866 entry: 867 br label %for.body 868 869 for.body: 870 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 871 %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] 872 %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv 873 %0 = load float, float* %arrayidx, align 4 874 %cmp3 = fcmp fast ogt float %0, %max.red.08 875 %max.red.0 = select i1 %cmp3, float %0, float %max.red.08 876 %indvars.iv.next = add i64 %indvars.iv, 1 877 %exitcond = icmp eq i64 %indvars.iv.next, 1024 878 br i1 %exitcond, label %for.end, label %for.body 879 880 for.end: 881 ret float %max.red.0 882 } 883 884 885 attributes #0 = { "no-nans-fp-math"="true" } 886