1 char netcpu_kstat10_id[]="\ 2 @(#)netcpu_kstat10.c (c) Copyright 2005-2012, Hewlett-Packard Company Version 2.6.0"; 3 4 #if HAVE_CONFIG_H 5 # include <config.h> 6 #endif 7 8 #include <stdio.h> 9 10 #if HAVE_INTTYPES_H 11 # include <inttypes.h> 12 #else 13 # if HAVE_STDINT_H 14 # include <stdint.h> 15 # endif 16 #endif 17 18 #if HAVE_UNISTD_H 19 # include <unistd.h> 20 #endif 21 #if HAVE_STRINGS_H 22 # include <strings.h> 23 #endif 24 #if STDC_HEADERS 25 # include <stdlib.h> 26 # include <stddef.h> 27 #else 28 # if HAVE_STDLIB_H 29 # include <stdlib.h> 30 # endif 31 #endif 32 33 #include <errno.h> 34 35 #include <kstat.h> 36 #include <sys/sysinfo.h> 37 38 #include "netsh.h" 39 #include "netlib.h" 40 41 static kstat_ctl_t *kc = NULL; 42 static kid_t kcid = 0; 43 44 typedef struct cpu_time_counters { 45 uint64_t idle; 46 uint64_t user; 47 uint64_t kernel; 48 uint64_t interrupt; 49 } cpu_time_counters_t; 50 51 static cpu_time_counters_t starting_cpu_counters[MAXCPUS]; 52 static cpu_time_counters_t ending_cpu_counters[MAXCPUS]; 53 static cpu_time_counters_t delta_cpu_counters[MAXCPUS]; 54 static cpu_time_counters_t corrected_cpu_counters[MAXCPUS]; 55 56 static void 57 print_cpu_time_counters(char *name, int instance, cpu_time_counters_t *counters) 58 { 59 fprintf(where, 60 "%s[%d]:\n" 61 "\t idle %llu\n" 62 "\t user %llu\n" 63 "\t kernel %llu\n" 64 "\t interrupt %llu\n", 65 name,instance, 66 counters[instance].idle, 67 counters[instance].user, 68 counters[instance].kernel, 69 counters[instance].interrupt); 70 } 71 72 void 73 cpu_util_init(void) 74 { 75 kstat_t *ksp; 76 int i; 77 kc = kstat_open(); 78 79 if (kc == NULL) { 80 fprintf(where, 81 "cpu_util_init: kstat_open: errno %d %s\n", 82 errno, 83 strerror(errno)); 84 fflush(where); 85 exit(-1); 86 } 87 88 /* lets flesh-out a CPU instance number map since it seems that some 89 systems, not even those which are partitioned, can have 90 non-contiguous CPU numbers. discovered "the hard way" on a 91 T5220. raj 20080804 */ 92 i = 0; 93 for (ksp = kc->kc_chain, i = 0; 94 (ksp != NULL) && (i < MAXCPUS); 95 ksp = ksp->ks_next) { 96 if ((strcmp(ksp->ks_module,"cpu") == 0) && 97 (strcmp(ksp->ks_name,"sys") == 0)) { 98 if (debug) { 99 fprintf(where,"Mapping CPU instance %d to entry %d\n", 100 ksp->ks_instance,i); 101 fflush(where); 102 } 103 lib_cpu_map[i++] = ksp->ks_instance; 104 } 105 } 106 107 if (MAXCPUS == i) { 108 fprintf(where, 109 "Sorry, this system has more CPUs (%d) than netperf can handle (%d).\n" 110 "Please alter MAXCPUS in netlib.h and recompile.\n", 111 i, 112 MAXCPUS); 113 fflush(where); 114 exit(1); 115 } 116 117 return; 118 } 119 120 void 121 cpu_util_terminate(void) 122 { 123 kstat_close(kc); 124 return; 125 } 126 127 int 128 get_cpu_method(void) 129 { 130 return KSTAT_10; 131 } 132 133 static void 134 print_unexpected_statistic_warning(char *who, char *what, char *why) 135 { 136 if (why) { 137 fprintf(where, 138 "WARNING! WARNING! WARNING! WARNING!\n" 139 "%s found an unexpected %s statistic %.16s\n", 140 who, 141 why, 142 what); 143 } 144 else { 145 fprintf(where, 146 "%s is ignoring statistic %.16s\n", 147 who, 148 what); 149 } 150 } 151 152 static void 153 get_cpu_counters(int cpu_num, cpu_time_counters_t *counters) 154 { 155 156 kstat_t *ksp; 157 int found=0; 158 kid_t nkcid; 159 kstat_named_t *knp; 160 int i; 161 162 ksp = kstat_lookup(kc, "cpu", lib_cpu_map[cpu_num], "sys"); 163 if ((ksp) && (ksp->ks_type == KSTAT_TYPE_NAMED)) { 164 /* happiness and joy, keep going */ 165 nkcid = kstat_read(kc, ksp, NULL); 166 if (nkcid != -1) { 167 /* happiness and joy, keep going. we could consider adding a 168 "found < 3" to the end conditions, but then we wouldn't 169 search to the end and find that Sun added some nsec. we 170 probably want to see if they add an nsec. raj 2005-01-28 */ 171 for (i = ksp->ks_ndata, knp = ksp->ks_data; 172 i > 0; 173 knp++,i--) { 174 /* we would be hosed if the same name could appear twice */ 175 if (!strcmp("cpu_nsec_idle",knp->name)) { 176 found++; 177 counters[cpu_num].idle = knp->value.ui64; 178 } 179 else if (!strcmp("cpu_nsec_user",knp->name)) { 180 found++; 181 counters[cpu_num].user = knp->value.ui64; 182 } 183 else if (!strcmp("cpu_nsec_kernel",knp->name)) { 184 found++; 185 counters[cpu_num].kernel = knp->value.ui64; 186 } 187 else if (!strcmp("cpu_nsec_intr",knp->name)) { 188 if (debug >= 2) { 189 fprintf(where, 190 "Found a cpu_nsec_intr but it doesn't do what we want\n"); 191 fflush(where); 192 } 193 } 194 else if (strstr(knp->name,"nsec")) { 195 /* finding another nsec here means Sun have changed 196 something and we need to warn the user. raj 2005-01-28 */ 197 print_unexpected_statistic_warning("get_cpu_counters", 198 knp->name, 199 "nsec"); 200 } 201 else if (debug >=2) { 202 203 /* might want to tell people about what we are skipping. 204 however, only display other names debug >=2. raj 205 2005-01-28 */ 206 207 print_unexpected_statistic_warning("get_cpu_counters", 208 knp->name, 209 NULL); 210 } 211 } 212 if (3 == found) { 213 /* happiness and joy */ 214 return; 215 } 216 else { 217 fprintf(where, 218 "get_cpu_counters could not find one or more of the expected counters!\n"); 219 fflush(where); 220 exit(-1); 221 } 222 } 223 else { 224 /* the kstat_read returned an error or the chain changed */ 225 fprintf(where, 226 "get_cpu_counters: kstat_read failed or chain id changed %d %s\n", 227 errno, 228 strerror(errno)); 229 fflush(where); 230 exit(-1); 231 } 232 } 233 else { 234 /* the lookup failed or found the wrong type */ 235 fprintf(where, 236 "get_cpu_counters: kstat_lookup failed for module 'cpu' number %d instance %d name 'sys' and KSTAT_TYPE_NAMED: errno %d %s\n", 237 cpu_num, 238 lib_cpu_map[cpu_num], 239 errno, 240 strerror(errno)); 241 fflush(where); 242 exit(-1); 243 } 244 } 245 246 static void 247 get_interrupt_counters(int cpu_num, cpu_time_counters_t *counters) 248 { 249 kstat_t *ksp; 250 int found=0; 251 kid_t nkcid; 252 kstat_named_t *knp; 253 int i; 254 255 ksp = kstat_lookup(kc, "cpu", lib_cpu_map[cpu_num], "intrstat"); 256 257 counters[cpu_num].interrupt = 0; 258 if ((ksp) && (ksp->ks_type == KSTAT_TYPE_NAMED)) { 259 /* happiness and joy, keep going */ 260 nkcid = kstat_read(kc, ksp, NULL); 261 if (nkcid != -1) { 262 /* happiness and joy, keep going. we could consider adding a 263 "found < 15" to the end conditions, but then we wouldn't 264 search to the end and find that Sun added some "time." we 265 probably want to see if they add a "nsec." raj 2005-01-28 */ 266 for (i = ksp->ks_ndata, knp = ksp->ks_data; 267 i > 0; 268 knp++,i--) { 269 if (strstr(knp->name,"time")) { 270 found++; 271 counters[cpu_num].interrupt += knp->value.ui64; 272 } 273 else if (debug >=2) { 274 275 /* might want to tell people about what we are skipping. 276 however, only display other names debug >=2. raj 277 2005-01-28 278 */ 279 280 print_unexpected_statistic_warning("get_cpu_counters", 281 knp->name, 282 NULL); 283 } 284 } 285 if (15 == found) { 286 /* happiness and joy */ 287 return; 288 } 289 else { 290 fprintf(where, 291 "get_cpu_counters could not find one or more of the expected counters!\n"); 292 fflush(where); 293 exit(-1); 294 } 295 } 296 else { 297 /* the kstat_read returned an error or the chain changed */ 298 fprintf(where, 299 "get_cpu_counters: kstat_read failed or chain id changed %d %s\n", 300 errno, 301 strerror(errno)); 302 fflush(where); 303 exit(-1); 304 } 305 } 306 else { 307 /* the lookup failed or found the wrong type */ 308 fprintf(where, 309 "get_cpu_counters: kstat_lookup failed for module 'cpu' %d instance %d class 'intrstat' and KSTAT_TYPE_NAMED: errno %d %s\n", 310 cpu_num, 311 lib_cpu_map[cpu_num], 312 errno, 313 strerror(errno)); 314 fflush(where); 315 exit(-1); 316 } 317 318 } 319 320 static void 321 get_cpu_time_counters(cpu_time_counters_t *counters) 322 { 323 324 int i; 325 326 for (i = 0; i < lib_num_loc_cpus; i++){ 327 get_cpu_counters(i, counters); 328 get_interrupt_counters(i, counters); 329 } 330 331 return; 332 } 333 334 /* the kstat10 mechanism, since it is based on actual nanosecond 335 counters is not going to use a comparison to an idle rate. so, the 336 calibrate_idle_rate routine will be rather simple :) raj 2005-01-28 337 */ 338 339 float 340 calibrate_idle_rate(int iterations, int interval) 341 { 342 return 0.0; 343 } 344 345 float 346 calc_cpu_util_internal(float elapsed_time) 347 { 348 int i; 349 float correction_factor; 350 float actual_rate; 351 352 uint64_t total_cpu_nsec; 353 354 /* multiply by 100 and divide by total and you get whole 355 percentages. multiply by 1000 and divide by total and you get 356 tenths of percentages. multiply by 10000 and divide by total and 357 you get hundredths of percentages. etc etc etc raj 2005-01-28 */ 358 359 #define CALC_PERCENT 100 360 #define CALC_TENTH_PERCENT 1000 361 #define CALC_HUNDREDTH_PERCENT 10000 362 #define CALC_THOUSANDTH_PERCENT 100000 363 #define CALC_ACCURACY CALC_THOUSANDTH_PERCENT 364 365 uint64_t fraction_idle; 366 uint64_t fraction_user; 367 uint64_t fraction_kernel; 368 uint64_t fraction_interrupt; 369 370 uint64_t interrupt_idle; 371 uint64_t interrupt_user; 372 uint64_t interrupt_kernel; 373 374 memset(&lib_local_cpu_stats, 0, sizeof(lib_local_cpu_stats)); 375 376 /* It is possible that the library measured a time other than the 377 one that the user want for the cpu utilization calculations - for 378 example, tests that were ended by watchdog timers such as the udp 379 stream test. We let these tests tell up what the elapsed time 380 should be. */ 381 382 if (elapsed_time != 0.0) { 383 correction_factor = (float) 1.0 + 384 ((lib_elapsed - elapsed_time) / elapsed_time); 385 } 386 else { 387 correction_factor = (float) 1.0; 388 } 389 390 for (i = 0; i < lib_num_loc_cpus; i++) { 391 392 /* this is now the fun part. we have the nanoseconds _allegedly_ 393 spent in user, idle and kernel. We also have nanoseconds spent 394 servicing interrupts. Sadly, in the developer's finite wisdom, 395 the interrupt time accounting is in parallel with the other 396 accounting. this means that time accounted in user, kernel or 397 idle will also include time spent in interrupt. for netperf's 398 porpoises we do not really care about that for user and kernel, 399 but we certainly do care for idle. the $64B question becomes - 400 how to "correct" for this? 401 402 we could just subtract interrupt time from idle. that has the 403 virtue of simplicity and also "punishes" Sun for doing 404 something that seems to be so stupid. however, we probably 405 have to be "fair" even to the allegedly stupid so the other 406 mechanism, suggested by a Sun engineer is to subtract interrupt 407 time from each of user, kernel and idle in proportion to their 408 numbers. then we sum the corrected user, kernel and idle along 409 with the interrupt time and use that to calculate a new idle 410 percentage and thus a CPU util percentage. 411 412 that is what we will attempt to do here. raj 2005-01-28 413 414 of course, we also have to wonder what we should do if there is 415 more interrupt time than the sum of user, kernel and idle. 416 that is a theoretical possibility I suppose, but for the 417 time-being, one that we will blythly ignore, except perhaps for 418 a quick check. raj 2005-01-31 419 */ 420 421 /* we ass-u-me that these counters will never wrap during a 422 netperf run. this may not be a particularly safe thing to 423 do. raj 2005-01-28 */ 424 delta_cpu_counters[i].idle = ending_cpu_counters[i].idle - 425 starting_cpu_counters[i].idle; 426 delta_cpu_counters[i].user = ending_cpu_counters[i].user - 427 starting_cpu_counters[i].user; 428 delta_cpu_counters[i].kernel = ending_cpu_counters[i].kernel - 429 starting_cpu_counters[i].kernel; 430 delta_cpu_counters[i].interrupt = ending_cpu_counters[i].interrupt - 431 starting_cpu_counters[i].interrupt; 432 433 if (debug) { 434 print_cpu_time_counters("delta_cpu_counters",i,delta_cpu_counters); 435 } 436 437 /* for this summation, we do not include interrupt time */ 438 total_cpu_nsec = 439 delta_cpu_counters[i].idle + 440 delta_cpu_counters[i].user + 441 delta_cpu_counters[i].kernel; 442 443 if (debug) { 444 fprintf(where,"total_cpu_nsec %llu\n",total_cpu_nsec); 445 } 446 447 if (delta_cpu_counters[i].interrupt > total_cpu_nsec) { 448 /* we are not in Kansas any more Toto, and I am not quite sure 449 the best way to get our tails out of here so let us just 450 punt. raj 2005-01-31 */ 451 fprintf(where, 452 "WARNING! WARNING! WARNING! WARNING! WARNING! \n" 453 "calc_cpu_util_internal: more interrupt time than others combined!\n" 454 "\tso CPU util cannot be estimated\n" 455 "\t delta[%d].interrupt %llu\n" 456 "\t delta[%d].idle %llu\n" 457 "\t delta[%d].user %llu\n" 458 "\t delta[%d].kernel %llu\n", 459 i,delta_cpu_counters[i].interrupt, 460 i,delta_cpu_counters[i].idle, 461 i,delta_cpu_counters[i].user, 462 i,delta_cpu_counters[i].kernel); 463 fflush(where); 464 465 lib_local_cpu_stats.cpu_util = -1.0; 466 lib_local_per_cpu_util[i] = -1.0; 467 return -1.0; 468 } 469 470 /* and now some fun with integer math. i initially tried to 471 promote things to long doubled but that didn't seem to result 472 in happiness and joy. raj 2005-01-28 */ 473 474 fraction_idle = 475 (delta_cpu_counters[i].idle * CALC_ACCURACY) / total_cpu_nsec; 476 477 fraction_user = 478 (delta_cpu_counters[i].user * CALC_ACCURACY) / total_cpu_nsec; 479 480 fraction_kernel = 481 (delta_cpu_counters[i].kernel * CALC_ACCURACY) / total_cpu_nsec; 482 483 /* ok, we have our fractions, now we want to take that fraction of 484 the interrupt time and subtract that from the bucket. */ 485 486 interrupt_idle = ((delta_cpu_counters[i].interrupt * fraction_idle) / 487 CALC_ACCURACY); 488 489 interrupt_user = ((delta_cpu_counters[i].interrupt * fraction_user) / 490 CALC_ACCURACY); 491 492 interrupt_kernel = ((delta_cpu_counters[i].interrupt * fraction_kernel) / 493 CALC_ACCURACY); 494 495 if (debug) { 496 fprintf(where, 497 "\tfraction_idle %llu interrupt_idle %llu\n" 498 "\tfraction_user %llu interrupt_user %llu\n" 499 "\tfraction_kernel %llu interrupt_kernel %llu\n", 500 fraction_idle, 501 interrupt_idle, 502 fraction_user, 503 interrupt_user, 504 fraction_kernel, 505 interrupt_kernel); 506 } 507 508 corrected_cpu_counters[i].idle = delta_cpu_counters[i].idle - 509 interrupt_idle; 510 511 corrected_cpu_counters[i].user = delta_cpu_counters[i].user - 512 interrupt_user; 513 514 corrected_cpu_counters[i].kernel = delta_cpu_counters[i].kernel - 515 interrupt_kernel; 516 517 corrected_cpu_counters[i].interrupt = delta_cpu_counters[i].interrupt; 518 519 if (debug) { 520 print_cpu_time_counters("corrected_cpu_counters", 521 i, 522 corrected_cpu_counters); 523 } 524 525 /* I was going to check for going less than zero, but since all 526 the calculations are in unsigned quantities that would seem to 527 be a triffle silly... raj 2005-01-28 */ 528 529 /* ok, now we sum the numbers again, this time including interrupt 530 */ 531 532 total_cpu_nsec = 533 corrected_cpu_counters[i].idle + 534 corrected_cpu_counters[i].user + 535 corrected_cpu_counters[i].kernel + 536 corrected_cpu_counters[i].interrupt; 537 538 /* and recalculate our fractions we are really only going to use 539 fraction_idle, but lets calculate the rest just for the heck of 540 it. one day we may want to display them. raj 2005-01-28 */ 541 542 /* multiply by 100 and divide by total and you get whole 543 percentages. multiply by 1000 and divide by total and you get 544 tenths of percentages. multiply by 10000 and divide by total 545 and you get hundredths of percentages. etc etc etc raj 546 2005-01-28 */ 547 fraction_idle = 548 (corrected_cpu_counters[i].idle * CALC_ACCURACY) / total_cpu_nsec; 549 550 fraction_user = 551 (corrected_cpu_counters[i].user * CALC_ACCURACY) / total_cpu_nsec; 552 553 fraction_kernel = 554 (corrected_cpu_counters[i].kernel * CALC_ACCURACY) / total_cpu_nsec; 555 556 fraction_interrupt = 557 (corrected_cpu_counters[i].interrupt * CALC_ACCURACY) / total_cpu_nsec; 558 559 if (debug) { 560 fprintf(where,"\tfraction_idle %lu\n",fraction_idle); 561 fprintf(where,"\tfraction_user %lu\n",fraction_user); 562 fprintf(where,"\tfraction_kernel %lu\n",fraction_kernel); 563 fprintf(where,"\tfraction_interrupt %lu\n",fraction_interrupt); 564 } 565 566 /* and finally, what is our CPU utilization? */ 567 lib_local_per_cpu_util[i] = 100.0 - (((float)fraction_idle / 568 (float)CALC_ACCURACY) * 100.0); 569 lib_local_per_cpu_util[i] *= correction_factor; 570 if (debug) { 571 fprintf(where, 572 "lib_local_per_cpu_util[%d] %g cf %f\n", 573 i, 574 lib_local_per_cpu_util[i], 575 correction_factor); 576 } 577 lib_local_cpu_stats.cpu_util += lib_local_per_cpu_util[i]; 578 } 579 /* we want the average across all n processors */ 580 lib_local_cpu_stats.cpu_util /= (float)lib_num_loc_cpus; 581 582 return lib_local_cpu_stats.cpu_util; 583 } 584 585 void 586 cpu_start_internal(void) 587 { 588 get_cpu_time_counters(starting_cpu_counters); 589 return; 590 } 591 592 void 593 cpu_stop_internal(void) 594 { 595 get_cpu_time_counters(ending_cpu_counters); 596 } 597