1 ; RUN: llc < %s -march=x86-64 -mcpu=corei7 -verify-machineinstrs | FileCheck %s 2 3 define void @test(i8** %a, i64* %b, i64 %c, i64 %d) nounwind { 4 entry: 5 %ptrtoarg4 = load i8*, i8** %a, align 8 6 %brglist1 = getelementptr i8*, i8** %a, i64 1 7 %ptrtoarg25 = load i8*, i8** %brglist1, align 8 8 %0 = load i64, i64* %b, align 8 9 %1 = mul i64 %0, 4 10 %scevgep = getelementptr i8, i8* %ptrtoarg25, i64 %1 11 %2 = mul i64 %d, 4 12 br label %loop.cond 13 14 loop.cond: ; preds = %test.exit, %entry 15 %asr.iv6 = phi i8* [ %29, %test.exit ], [ %scevgep, %entry ] 16 %iv = phi i64 [ %0, %entry ], [ %28, %test.exit ] 17 %3 = icmp eq i64 %iv, %c 18 br i1 %3, label %return, label %loop 19 20 loop: ; preds = %loop.cond 21 %4 = load i64*, i64* addrspace(256)* inttoptr (i64 264 to i64* addrspace(256)*), align 8 22 %5 = load i64, i64* %4, align 8 23 %vector.size.i = ashr i64 %5, 3 24 %num.vector.wi.i = shl i64 %vector.size.i, 3 25 %6 = icmp eq i64 %vector.size.i, 0 26 br i1 %6, label %scalarIf.i, label %dim_0_vector_pre_head.i 27 28 dim_0_vector_pre_head.i: ; preds = %loop 29 %7 = trunc i64 %5 to i32 30 %tempvector_func.i = insertelement <8 x i32> undef, i32 %7, i32 0 31 %vectorvector_func.i = shufflevector <8 x i32> %tempvector_func.i, <8 x i32> undef, <8 x i32> zeroinitializer 32 br label %vector_kernel_entry.i 33 34 vector_kernel_entry.i: ; preds = %vector_kernel_entry.i, %dim_0_vector_pre_head.i 35 %asr.iv9 = phi i8* [ %scevgep10, %vector_kernel_entry.i ], [ %asr.iv6, %dim_0_vector_pre_head.i ] 36 %asr.iv = phi i64 [ %asr.iv.next, %vector_kernel_entry.i ], [ %vector.size.i, %dim_0_vector_pre_head.i ] 37 %8 = addrspacecast i8* %ptrtoarg4 to i32 addrspace(1)* 38 %asr.iv911 = addrspacecast i8* %asr.iv9 to <8 x i32> addrspace(1)* 39 %9 = load <8 x i32>, <8 x i32> addrspace(1)* %asr.iv911, align 4 40 %extract8vector_func.i = extractelement <8 x i32> %9, i32 0 41 %extract9vector_func.i = extractelement <8 x i32> %9, i32 1 42 %extract10vector_func.i = extractelement <8 x i32> %9, i32 2 43 %extract11vector_func.i = extractelement <8 x i32> %9, i32 3 44 %extract12vector_func.i = extractelement <8 x i32> %9, i32 4 45 %extract13vector_func.i = extractelement <8 x i32> %9, i32 5 46 %extract14vector_func.i = extractelement <8 x i32> %9, i32 6 47 %extract15vector_func.i = extractelement <8 x i32> %9, i32 7 48 %10 = atomicrmw min i32 addrspace(1)* %8, i32 %extract8vector_func.i seq_cst 49 %11 = atomicrmw min i32 addrspace(1)* %8, i32 %extract9vector_func.i seq_cst 50 %12 = atomicrmw min i32 addrspace(1)* %8, i32 %extract10vector_func.i seq_cst 51 %13 = atomicrmw min i32 addrspace(1)* %8, i32 %extract11vector_func.i seq_cst 52 %14 = atomicrmw min i32 addrspace(1)* %8, i32 %extract12vector_func.i seq_cst 53 %15 = atomicrmw min i32 addrspace(1)* %8, i32 %extract13vector_func.i seq_cst 54 %16 = atomicrmw min i32 addrspace(1)* %8, i32 %extract14vector_func.i seq_cst 55 %17 = atomicrmw min i32 addrspace(1)* %8, i32 %extract15vector_func.i seq_cst 56 store <8 x i32> %vectorvector_func.i, <8 x i32> addrspace(1)* %asr.iv911, align 4 57 %asr.iv.next = add i64 %asr.iv, -1 58 %scevgep10 = getelementptr i8, i8* %asr.iv9, i64 32 59 %dim_0_vector_cmp.to.max.i = icmp eq i64 %asr.iv.next, 0 60 br i1 %dim_0_vector_cmp.to.max.i, label %scalarIf.i, label %vector_kernel_entry.i 61 62 scalarIf.i: ; preds = %vector_kernel_entry.i, %loop 63 %exec_wi.i = phi i64 [ 0, %loop ], [ %num.vector.wi.i, %vector_kernel_entry.i ] 64 %18 = icmp eq i64 %exec_wi.i, %5 65 br i1 %18, label %test.exit, label %dim_0_pre_head.i 66 67 dim_0_pre_head.i: ; preds = %scalarIf.i 68 %19 = load i64*, i64* addrspace(256)* inttoptr (i64 264 to i64* addrspace(256)*), align 8 69 %20 = load i64, i64* %19, align 8 70 %21 = trunc i64 %20 to i32 71 %22 = mul i64 %vector.size.i, 8 72 br label %scalar_kernel_entry.i 73 74 scalar_kernel_entry.i: ; preds = %scalar_kernel_entry.i, %dim_0_pre_head.i 75 %asr.iv12 = phi i64 [ %asr.iv.next13, %scalar_kernel_entry.i ], [ %22, %dim_0_pre_head.i ] 76 %23 = addrspacecast i8* %asr.iv6 to i32 addrspace(1)* 77 %24 = addrspacecast i8* %ptrtoarg4 to i32 addrspace(1)* 78 %scevgep16 = getelementptr i32, i32 addrspace(1)* %23, i64 %asr.iv12 79 %25 = load i32, i32 addrspace(1)* %scevgep16, align 4 80 %26 = atomicrmw min i32 addrspace(1)* %24, i32 %25 seq_cst 81 %scevgep15 = getelementptr i32, i32 addrspace(1)* %23, i64 %asr.iv12 82 store i32 %21, i32 addrspace(1)* %scevgep15, align 4 83 %asr.iv.next13 = add i64 %asr.iv12, 1 84 %dim_0_cmp.to.max.i = icmp eq i64 %5, %asr.iv.next13 85 br i1 %dim_0_cmp.to.max.i, label %test.exit, label %scalar_kernel_entry.i 86 87 test.exit: ; preds = %scalar_kernel_entry.i, %scalarIf.i 88 %27 = bitcast i8* %asr.iv6 to i1* 89 %28 = add i64 %iv, %d 90 store i64 %28, i64* %b, align 8 91 %scevgep8 = getelementptr i1, i1* %27, i64 %2 92 %29 = bitcast i1* %scevgep8 to i8* 93 br label %loop.cond 94 95 return: ; preds = %loop.cond 96 store i64 %0, i64* %b, align 8 97 ret void 98 } 99 100 ; CHECK: test 101 ; CHECK: decq 102 ; CHECK-NOT: cmpxchgl 103 ; CHECK: jne 104 ; CHECK: ret 105