1 /******************************************************************************/ 2 /* */ 3 /* Paul Mackerras <paulus (at) samba.org>, 2009 */ 4 /* */ 5 /* This program is free software; you can redistribute it and/or modify */ 6 /* it under the terms of the GNU General Public License as published by */ 7 /* the Free Software Foundation; either version 2 of the License, or */ 8 /* (at your option) any later version. */ 9 /* */ 10 /* This program is distributed in the hope that it will be useful, */ 11 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ 12 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See */ 13 /* the GNU General Public License for more details. */ 14 /* */ 15 /* You should have received a copy of the GNU General Public License */ 16 /* along with this program; if not, write to the Free Software */ 17 /* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ 18 /* */ 19 /******************************************************************************/ 20 /* 21 Here's a little test program that checks whether software counters 22 (specifically, the task clock counter) work correctly when they're in 23 a group with hardware counters. 24 25 What it does is to create several groups, each with one hardware 26 counter, counting instructions, plus a task clock counter. It needs 27 to know an upper bound N on the number of hardware counters you have 28 (N defaults to 8), and it creates N+4 groups to force them to be 29 multiplexed. It also creates an overall task clock counter. 30 31 Then it spins for a while, and then stops all the counters and reads 32 them. It takes the total of the task clock counters in the groups and 33 computes the ratio of that total to the overall execution time from 34 the overall task clock counter. 35 36 That ratio should be equal to the number of actual hardware counters 37 that can count instructions. If the task clock counters in the groups 38 don't stop when their group gets taken off the PMU, the ratio will 39 instead be close to N+4. The program will declare that the test fails 40 if the ratio is greater than N (actually, N + 0.0001 to allow for FP 41 rounding errors). 42 43 Could someone run this on x86 on the latest PCL tree and let me know 44 what happens? I don't have an x86 crash box easily to hand. On 45 powerpc, it passes, but I think that is because I am missing setting 46 counter->prev_count in arch/powerpc/kernel/perf_counter.c, and I think 47 that means that enabling/disabling a group with a task clock counter 48 in it won't work correctly (I'll do a test program for that next). 49 50 Usage is: ./performance_counter02 [-v] 51 52 The -v flag makes it print out the values of each counter. 53 */ 54 55 #include <stdio.h> 56 #include <stddef.h> 57 #include <stdlib.h> 58 #include <string.h> 59 #include <fcntl.h> 60 #include <poll.h> 61 #include <unistd.h> 62 #include <errno.h> 63 #include "config.h" 64 #include <sys/prctl.h> 65 #include <sys/types.h> 66 #include <linux/types.h> 67 68 #if HAVE_PERF_EVENT_ATTR 69 # include <linux/perf_event.h> 70 #endif 71 72 #include "test.h" 73 #include "safe_macros.h" 74 #include "lapi/syscalls.h" 75 76 char *TCID = "perf_event_open02"; 77 int TST_TOTAL = 1; 78 79 #if HAVE_PERF_EVENT_ATTR 80 81 #define MAX_CTRS 1000 82 #define LOOPS 100000000 83 84 static int count_hardware_counters(void); 85 static void setup(void); 86 static void verify(void); 87 static void cleanup(void); 88 static void help(void); 89 90 static int n, nhw; 91 static int verbose; 92 static option_t options[] = { 93 {"v", &verbose, NULL}, 94 {NULL, NULL, NULL}, 95 }; 96 97 static int tsk0; 98 static int hwfd[MAX_CTRS], tskfd[MAX_CTRS]; 99 100 int main(int ac, char **av) 101 { 102 int lc; 103 104 tst_parse_opts(ac, av, options, help); 105 106 setup(); 107 108 for (lc = 0; TEST_LOOPING(lc); lc++) { 109 tst_count = 0; 110 verify(); 111 } 112 113 cleanup(); 114 tst_exit(); 115 } 116 117 static int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, 118 int cpu, int group_fd, unsigned long flags) 119 { 120 int ret; 121 122 ret = ltp_syscall(__NR_perf_event_open, hw_event, pid, cpu, 123 group_fd, flags); 124 return ret; 125 } 126 127 128 static void do_work(void) 129 { 130 int i; 131 132 for (i = 0; i < LOOPS; ++i) 133 asm volatile (""::"g" (i)); 134 } 135 136 struct read_format { 137 unsigned long long value; 138 /* if PERF_FORMAT_TOTAL_TIME_ENABLED */ 139 unsigned long long time_enabled; 140 /* if PERF_FORMAT_TOTAL_TIME_RUNNING */ 141 unsigned long long time_running; 142 }; 143 144 static int count_hardware_counters(void) 145 { 146 struct perf_event_attr hw_event; 147 int i, hwctrs = 0; 148 int fdarry[MAX_CTRS]; 149 struct read_format buf; 150 151 memset(&hw_event, 0, sizeof(struct perf_event_attr)); 152 153 hw_event.type = PERF_TYPE_HARDWARE; 154 hw_event.size = sizeof(struct perf_event_attr); 155 hw_event.disabled = 1; 156 hw_event.config = PERF_COUNT_HW_INSTRUCTIONS; 157 hw_event.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 158 PERF_FORMAT_TOTAL_TIME_RUNNING; 159 160 for (i = 0; i < MAX_CTRS; i++) { 161 fdarry[i] = perf_event_open(&hw_event, 0, -1, -1, 0); 162 if (fdarry[i] == -1) { 163 if (errno == ENOENT || errno == ENODEV) { 164 tst_brkm(TCONF | TERRNO, cleanup, 165 "PERF_COUNT_HW_INSTRUCTIONS not supported"); 166 } 167 tst_brkm(TBROK | TERRNO, cleanup, 168 "perf_event_open failed at iteration:%d", i); 169 } 170 171 if (prctl(PR_TASK_PERF_EVENTS_ENABLE) == -1) { 172 tst_brkm(TBROK | TERRNO, cleanup, 173 "prctl(PR_TASK_PERF_EVENTS_ENABLE) failed"); 174 } 175 176 do_work(); 177 178 if (prctl(PR_TASK_PERF_EVENTS_DISABLE) == -1) { 179 tst_brkm(TBROK | TERRNO, cleanup, 180 "prctl(PR_TASK_PERF_EVENTS_DISABLE) failed"); 181 } 182 183 if (read(fdarry[i], &buf, sizeof(buf)) != sizeof(buf)) { 184 tst_brkm(TBROK | TERRNO, cleanup, 185 "error reading counter(s)"); 186 } 187 188 if (verbose == 1) { 189 printf("at iteration:%d value:%lld time_enabled:%lld " 190 "time_running:%lld\n", i, buf.value, 191 buf.time_enabled, buf.time_running); 192 } 193 194 /* 195 * Normally time_enabled and time_running are the same value. 196 * But if more events are started than available counter slots 197 * on the PMU, then multiplexing happens and events run only 198 * part of the time. Time_enabled and time_running's values 199 * will be different. In this case the time_enabled and time_ 200 * running values can be used to scale an estimated value for 201 * the count. So if buf.time_enabled and buf.time_running are 202 * not equal, we can think that PMU hardware counters 203 * multiplexing happens and the number of the opened events 204 * are the number of max available hardware counters. 205 */ 206 if (buf.time_enabled != buf.time_running) { 207 hwctrs = i; 208 break; 209 } 210 } 211 212 for (i = 0; i <= hwctrs; i++) 213 SAFE_CLOSE(cleanup, fdarry[i]); 214 215 return hwctrs; 216 } 217 218 static void setup(void) 219 { 220 int i; 221 struct perf_event_attr tsk_event, hw_event; 222 223 /* 224 * According to perf_event_open's manpage, the official way of 225 * knowing if perf_event_open() support is enabled is checking for 226 * the existence of the file /proc/sys/kernel/perf_event_paranoid. 227 */ 228 if (access("/proc/sys/kernel/perf_event_paranoid", F_OK) == -1) 229 tst_brkm(TCONF, NULL, "Kernel doesn't have perf_event support"); 230 231 tst_sig(NOFORK, DEF_HANDLER, cleanup); 232 233 TEST_PAUSE; 234 235 nhw = count_hardware_counters(); 236 n = nhw + 4; 237 238 memset(&hw_event, 0, sizeof(struct perf_event_attr)); 239 memset(&tsk_event, 0, sizeof(struct perf_event_attr)); 240 241 tsk_event.type = PERF_TYPE_SOFTWARE; 242 tsk_event.size = sizeof(struct perf_event_attr); 243 tsk_event.disabled = 1; 244 tsk_event.config = PERF_COUNT_SW_TASK_CLOCK; 245 246 hw_event.type = PERF_TYPE_HARDWARE; 247 hw_event.size = sizeof(struct perf_event_attr); 248 hw_event.disabled = 1; 249 hw_event.config = PERF_COUNT_HW_INSTRUCTIONS; 250 251 tsk0 = perf_event_open(&tsk_event, 0, -1, -1, 0); 252 if (tsk0 == -1) { 253 tst_brkm(TBROK | TERRNO, cleanup, "perf_event_open failed"); 254 } else { 255 tsk_event.disabled = 0; 256 for (i = 0; i < n; ++i) { 257 hwfd[i] = perf_event_open(&hw_event, 0, -1, -1, 0); 258 tskfd[i] = perf_event_open(&tsk_event, 0, -1, 259 hwfd[i], 0); 260 if (tskfd[i] == -1 || hwfd[i] == -1) { 261 tst_brkm(TBROK | TERRNO, cleanup, 262 "perf_event_open failed"); 263 } 264 } 265 } 266 } 267 268 static void cleanup(void) 269 { 270 int i; 271 272 for (i = 0; i < n; i++) { 273 if (hwfd[i] > 0 && close(hwfd[i]) == -1) 274 tst_resm(TWARN | TERRNO, "close(%d) failed", hwfd[i]); 275 if (tskfd[i] > 0 && close(tskfd[i]) == -1) 276 tst_resm(TWARN | TERRNO, "close(%d) failed", tskfd[i]); 277 } 278 279 if (tsk0 > 0 && close(tsk0) == -1) 280 tst_resm(TWARN | TERRNO, "close(%d) failed", tsk0); 281 } 282 283 static void verify(void) 284 { 285 unsigned long long vt0, vt[MAX_CTRS], vh[MAX_CTRS]; 286 unsigned long long vtsum = 0, vhsum = 0; 287 int i; 288 double ratio; 289 290 if (prctl(PR_TASK_PERF_EVENTS_ENABLE) == -1) { 291 tst_brkm(TBROK | TERRNO, cleanup, 292 "prctl(PR_TASK_PERF_EVENTS_ENABLE) failed"); 293 } 294 295 do_work(); 296 297 if (prctl(PR_TASK_PERF_EVENTS_DISABLE) == -1) { 298 tst_brkm(TBROK | TERRNO, cleanup, 299 "prctl(PR_TASK_PERF_EVENTS_DISABLE) failed"); 300 } 301 302 if (read(tsk0, &vt0, sizeof(vt0)) != sizeof(vt0)) { 303 tst_brkm(TBROK | TERRNO, cleanup, 304 "error reading task clock counter"); 305 } 306 307 for (i = 0; i < n; ++i) { 308 if (read(tskfd[i], &vt[i], sizeof(vt[i])) != sizeof(vt[i]) || 309 read(hwfd[i], &vh[i], sizeof(vh[i])) != sizeof(vh[i])) { 310 tst_brkm(TBROK | TERRNO, cleanup, 311 "error reading counter(s)"); 312 } 313 vtsum += vt[i]; 314 vhsum += vh[i]; 315 } 316 317 tst_resm(TINFO, "overall task clock: %llu", vt0); 318 tst_resm(TINFO, "hw sum: %llu, task clock sum: %llu", vhsum, vtsum); 319 320 if (verbose == 1) { 321 printf("hw counters:"); 322 for (i = 0; i < n; ++i) 323 printf(" %llu", vh[i]); 324 printf("\ntask clock counters:"); 325 for (i = 0; i < n; ++i) 326 printf(" %llu", vt[i]); 327 printf("\n"); 328 } 329 330 ratio = (double)vtsum / vt0; 331 tst_resm(TINFO, "ratio: %lf", ratio); 332 if (ratio > nhw + 0.0001) { 333 tst_resm(TFAIL, "test failed (ratio was greater than )"); 334 } else { 335 tst_resm(TPASS, "test passed"); 336 } 337 } 338 339 static void help(void) 340 { 341 printf(" -v Print verbose information\n"); 342 } 343 344 #else 345 346 int main(void) 347 { 348 tst_brkm(TCONF, NULL, "This system doesn't have " 349 "header file:<linux/perf_event.h> or " 350 "no struct perf_event_attr defined"); 351 } 352 #endif 353