Home | History | Annotate | Download | only in X86
      1 ; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -verify-machineinstrs < %s | FileCheck %s
      2 
      3 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
      4 ;; This test checks that x86-cmov-converter optimization does not transform CMOV
      5 ;; instruction when the gain (in cycles) of converting to branch is less than
      6 ;; a fix threshold (measured for "-x86-cmov-converter-threshold=4").
      7 ;;
      8 ;; Test was created using the following command line:
      9 ;; > clang -S -O2 -m64 -fno-vectorize -fno-unroll-loops -emit-llvm foo.c -o -
     10 ;; Where foo.c is:
     11 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     12 ;;int bar(int *a, int *b, int n) {
     13 ;;  int sum = 0;
     14 ;;  for (int i = 0; i < n; ++i) {
     15 ;;    int x = a[i] * a[i+1] * a[i+2];
     16 ;;    int y = b[i] * b[i+1];
     17 ;;    sum += y > x ? x : 0;
     18 ;;  }
     19 ;;  return sum;
     20 ;;}
     21 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     22 ;; Adding a test to the above function shows code with CMOV is 25% faster than
     23 ;; the code with branch.
     24 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     25 ;;#define N 10000
     26 ;;int A[N];
     27 ;;int B[N];
     28 ;;
     29 ;;
     30 ;;
     31 ;;int main () {
     32 ;;  for (int i=0; i< N; ++i) {
     33 ;;    A[i] = i%4;
     34 ;;    B[i] = i%5;
     35 ;;  }
     36 ;;  int sum = 0;
     37 ;;  for (int i=0; i< N*10; ++i)
     38 ;;    sum += bar(A, B, N);
     39 ;;  return sum;
     40 ;;}
     41 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     42 
     43 ; CHECK-NOT: jg
     44 ; CHECK: cmovle
     45 define i32 @bar(i32* nocapture readonly %a, i32* nocapture readonly %b, i32 %n) #0 {
     46 entry:
     47   %cmp30 = icmp sgt i32 %n, 0
     48   br i1 %cmp30, label %for.body.preheader, label %for.cond.cleanup
     49 
     50 for.body.preheader:                               ; preds = %entry
     51   %.pre = load i32, i32* %a, align 4
     52   %arrayidx2.phi.trans.insert = getelementptr inbounds i32, i32* %a, i64 1
     53   %.pre34 = load i32, i32* %arrayidx2.phi.trans.insert, align 4
     54   %.pre35 = load i32, i32* %b, align 4
     55   %wide.trip.count = zext i32 %n to i64
     56   br label %for.body
     57 
     58 for.cond.cleanup:                                 ; preds = %for.body, %entry
     59   %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add14, %for.body ]
     60   ret i32 %sum.0.lcssa
     61 
     62 for.body:                                         ; preds = %for.body, %for.body.preheader
     63   %0 = phi i32 [ %.pre35, %for.body.preheader ], [ %5, %for.body ]
     64   %1 = phi i32 [ %.pre34, %for.body.preheader ], [ %4, %for.body ]
     65   %2 = phi i32 [ %.pre, %for.body.preheader ], [ %1, %for.body ]
     66   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
     67   %sum.032 = phi i32 [ 0, %for.body.preheader ], [ %add14, %for.body ]
     68   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
     69   %mul = mul nsw i32 %1, %2
     70   %3 = add nuw nsw i64 %indvars.iv, 2
     71   %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %3
     72   %4 = load i32, i32* %arrayidx5, align 4
     73   %mul6 = mul nsw i32 %mul, %4
     74   %arrayidx11 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
     75   %5 = load i32, i32* %arrayidx11, align 4
     76   %mul12 = mul nsw i32 %5, %0
     77   %cmp13 = icmp sgt i32 %mul12, %mul6
     78   %cond = select i1 %cmp13, i32 %mul6, i32 0
     79   %add14 = add nsw i32 %cond, %sum.032
     80   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
     81   br i1 %exitcond, label %for.cond.cleanup, label %for.body
     82 }
     83 
     84 attributes #0 = {"target-cpu"="skylake"}
     85 
     86 !llvm.module.flags = !{!0, !1}
     87 !llvm.ident = !{!2}
     88 
     89 !0 = !{i32 1, !"wchar_size", i32 2}
     90 !1 = !{i32 7, !"PIC Level", i32 2}
     91 !2 = !{!"clang version 5.0.0 (trunk)"}
     92