1 ; RUN: opt < %s -analyze -block-freq | FileCheck %s 2 3 ; This code contains three loops. One is triple-nested, the 4 ; second is double nested and the third is a single loop. At 5 ; runtime, all three loops execute 1,000,000 times each. We use to 6 ; give different frequencies to each of the loops because loop 7 ; scales were limited to no more than 4,096. 8 ; 9 ; This was penalizing the hotness of the second and third loops 10 ; because BFI was reducing the loop scale for for.cond16 and 11 ; for.cond26 to a max of 4,096. 12 ; 13 ; Without this restriction, all loops are now correctly given the same 14 ; frequency values. 15 ; 16 ; Original C code: 17 ; 18 ; 19 ; int g; 20 ; __attribute__((noinline)) void bar() { 21 ; g++; 22 ; } 23 ; 24 ; extern int printf(const char*, ...); 25 ; 26 ; int main() 27 ; { 28 ; int i, j, k; 29 ; 30 ; g = 0; 31 ; for (i = 0; i < 100; i++) 32 ; for (j = 0; j < 100; j++) 33 ; for (k = 0; k < 100; k++) 34 ; bar(); 35 ; 36 ; printf ("g = %d\n", g); 37 ; g = 0; 38 ; 39 ; for (i = 0; i < 100; i++) 40 ; for (j = 0; j < 10000; j++) 41 ; bar(); 42 ; 43 ; printf ("g = %d\n", g); 44 ; g = 0; 45 ; 46 ; 47 ; for (i = 0; i < 1000000; i++) 48 ; bar(); 49 ; 50 ; printf ("g = %d\n", g); 51 ; g = 0; 52 ; } 53 54 @g = common global i32 0, align 4 55 @.str = private unnamed_addr constant [8 x i8] c"g = %d\0A\00", align 1 56 57 declare void @bar() 58 declare i32 @printf(i8*, ...) 59 60 ; CHECK: Printing analysis {{.*}} for function 'main': 61 ; CHECK-NEXT: block-frequency-info: main 62 define i32 @main() { 63 entry: 64 %retval = alloca i32, align 4 65 %i = alloca i32, align 4 66 %j = alloca i32, align 4 67 %k = alloca i32, align 4 68 store i32 0, i32* %retval 69 store i32 0, i32* @g, align 4 70 store i32 0, i32* %i, align 4 71 br label %for.cond 72 73 for.cond: ; preds = %for.inc10, %entry 74 %0 = load i32, i32* %i, align 4 75 %cmp = icmp slt i32 %0, 100 76 br i1 %cmp, label %for.body, label %for.end12, !prof !1 77 78 for.body: ; preds = %for.cond 79 store i32 0, i32* %j, align 4 80 br label %for.cond1 81 82 for.cond1: ; preds = %for.inc7, %for.body 83 %1 = load i32, i32* %j, align 4 84 %cmp2 = icmp slt i32 %1, 100 85 br i1 %cmp2, label %for.body3, label %for.end9, !prof !2 86 87 for.body3: ; preds = %for.cond1 88 store i32 0, i32* %k, align 4 89 br label %for.cond4 90 91 for.cond4: ; preds = %for.inc, %for.body3 92 %2 = load i32, i32* %k, align 4 93 %cmp5 = icmp slt i32 %2, 100 94 br i1 %cmp5, label %for.body6, label %for.end, !prof !3 95 96 ; CHECK: - for.body6: float = 500000.5, int = 4000003 97 for.body6: ; preds = %for.cond4 98 call void @bar() 99 br label %for.inc 100 101 for.inc: ; preds = %for.body6 102 %3 = load i32, i32* %k, align 4 103 %inc = add nsw i32 %3, 1 104 store i32 %inc, i32* %k, align 4 105 br label %for.cond4 106 107 for.end: ; preds = %for.cond4 108 br label %for.inc7 109 110 for.inc7: ; preds = %for.end 111 %4 = load i32, i32* %j, align 4 112 %inc8 = add nsw i32 %4, 1 113 store i32 %inc8, i32* %j, align 4 114 br label %for.cond1 115 116 for.end9: ; preds = %for.cond1 117 br label %for.inc10 118 119 for.inc10: ; preds = %for.end9 120 %5 = load i32, i32* %i, align 4 121 %inc11 = add nsw i32 %5, 1 122 store i32 %inc11, i32* %i, align 4 123 br label %for.cond 124 125 for.end12: ; preds = %for.cond 126 %6 = load i32, i32* @g, align 4 127 %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %6) 128 store i32 0, i32* @g, align 4 129 store i32 0, i32* %i, align 4 130 br label %for.cond13 131 132 for.cond13: ; preds = %for.inc22, %for.end12 133 %7 = load i32, i32* %i, align 4 134 %cmp14 = icmp slt i32 %7, 100 135 br i1 %cmp14, label %for.body15, label %for.end24, !prof !1 136 137 for.body15: ; preds = %for.cond13 138 store i32 0, i32* %j, align 4 139 br label %for.cond16 140 141 for.cond16: ; preds = %for.inc19, %for.body15 142 %8 = load i32, i32* %j, align 4 143 %cmp17 = icmp slt i32 %8, 10000 144 br i1 %cmp17, label %for.body18, label %for.end21, !prof !4 145 146 ; CHECK: - for.body18: float = 500000.5, int = 4000003 147 for.body18: ; preds = %for.cond16 148 call void @bar() 149 br label %for.inc19 150 151 for.inc19: ; preds = %for.body18 152 %9 = load i32, i32* %j, align 4 153 %inc20 = add nsw i32 %9, 1 154 store i32 %inc20, i32* %j, align 4 155 br label %for.cond16 156 157 for.end21: ; preds = %for.cond16 158 br label %for.inc22 159 160 for.inc22: ; preds = %for.end21 161 %10 = load i32, i32* %i, align 4 162 %inc23 = add nsw i32 %10, 1 163 store i32 %inc23, i32* %i, align 4 164 br label %for.cond13 165 166 for.end24: ; preds = %for.cond13 167 %11 = load i32, i32* @g, align 4 168 %call25 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %11) 169 store i32 0, i32* @g, align 4 170 store i32 0, i32* %i, align 4 171 br label %for.cond26 172 173 for.cond26: ; preds = %for.inc29, %for.end24 174 %12 = load i32, i32* %i, align 4 175 %cmp27 = icmp slt i32 %12, 1000000 176 br i1 %cmp27, label %for.body28, label %for.end31, !prof !5 177 178 ; CHECK: - for.body28: float = 500000.5, int = 4000003 179 for.body28: ; preds = %for.cond26 180 call void @bar() 181 br label %for.inc29 182 183 for.inc29: ; preds = %for.body28 184 %13 = load i32, i32* %i, align 4 185 %inc30 = add nsw i32 %13, 1 186 store i32 %inc30, i32* %i, align 4 187 br label %for.cond26 188 189 for.end31: ; preds = %for.cond26 190 %14 = load i32, i32* @g, align 4 191 %call32 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %14) 192 store i32 0, i32* @g, align 4 193 %15 = load i32, i32* %retval 194 ret i32 %15 195 } 196 197 !llvm.ident = !{!0} 198 199 !0 = !{!"clang version 3.7.0 (trunk 232635) (llvm/trunk 232636)"} 200 !1 = !{!"branch_weights", i32 101, i32 2} 201 !2 = !{!"branch_weights", i32 10001, i32 101} 202 !3 = !{!"branch_weights", i32 1000001, i32 10001} 203 !4 = !{!"branch_weights", i32 1000001, i32 101} 204 !5 = !{!"branch_weights", i32 1000001, i32 2} 205