1 char netcpu_kstat10_id[]="\ 2 @(#)netcpu_kstat10.c (c) Copyright 2005-2007, Hewlett-Packard Company Version 2.4.3"; 3 4 #if HAVE_CONFIG_H 5 # include <config.h> 6 #endif 7 8 #include <stdio.h> 9 10 #if HAVE_INTTYPES_H 11 # include <inttypes.h> 12 #else 13 # if HAVE_STDINT_H 14 # include <stdint.h> 15 # endif 16 #endif 17 18 #if HAVE_UNISTD_H 19 # include <unistd.h> 20 #endif 21 #if HAVE_STRINGS_H 22 # include <strings.h> 23 #endif 24 #if STDC_HEADERS 25 # include <stdlib.h> 26 # include <stddef.h> 27 #else 28 # if HAVE_STDLIB_H 29 # include <stdlib.h> 30 # endif 31 #endif 32 33 #include <errno.h> 34 35 #include <kstat.h> 36 #include <sys/sysinfo.h> 37 38 #include "netsh.h" 39 #include "netlib.h" 40 41 static kstat_ctl_t *kc = NULL; 42 static kid_t kcid = 0; 43 44 typedef struct cpu_time_counters { 45 uint64_t idle; 46 uint64_t user; 47 uint64_t kernel; 48 uint64_t interrupt; 49 } cpu_time_counters_t; 50 51 static cpu_time_counters_t starting_cpu_counters[MAXCPUS]; 52 static cpu_time_counters_t ending_cpu_counters[MAXCPUS]; 53 static cpu_time_counters_t delta_cpu_counters[MAXCPUS]; 54 static cpu_time_counters_t corrected_cpu_counters[MAXCPUS]; 55 56 static void 57 print_cpu_time_counters(char *name, int instance, cpu_time_counters_t *counters) 58 { 59 fprintf(where,"%s[%d]:\n",name,instance); 60 fprintf(where, 61 "\t idle %llu\n",counters[instance].idle); 62 fprintf(where, 63 "\t user %llu\n",counters[instance].user); 64 fprintf(where, 65 "\t kernel %llu\n",counters[instance].kernel); 66 fprintf(where, 67 "\t interrupt %llu\n",counters[instance].interrupt); 68 } 69 70 void 71 cpu_util_init(void) 72 { 73 kc = kstat_open(); 74 75 if (kc == NULL) { 76 fprintf(where, 77 "cpu_util_init: kstat_open: errno %d %s\n", 78 errno, 79 strerror(errno)); 80 fflush(where); 81 exit(-1); 82 } 83 return; 84 } 85 86 void 87 cpu_util_terminate(void) 88 { 89 kstat_close(kc); 90 return; 91 } 92 93 int 94 get_cpu_method(void) 95 { 96 return KSTAT_10; 97 } 98 99 static void 100 print_unexpected_statistic_warning(char *who, char *what, char *why) 101 { 102 if (why) { 103 fprintf(where, 104 "WARNING! WARNING! WARNING! WARNING!\n"); 105 fprintf(where, 106 "%s found an unexpected %s statistic %.16s\n", 107 who, 108 why, 109 what); 110 } 111 else { 112 fprintf(where, 113 "%s is ignoring statistic %.16s\n", 114 who, 115 what); 116 } 117 } 118 119 static void 120 get_cpu_counters(int cpu_num, cpu_time_counters_t *counters) 121 { 122 123 kstat_t *ksp; 124 int found=0; 125 kid_t nkcid; 126 kstat_named_t *knp; 127 int i; 128 129 ksp = kstat_lookup(kc, "cpu", cpu_num, "sys"); 130 if ((ksp) && (ksp->ks_type == KSTAT_TYPE_NAMED)) { 131 /* happiness and joy, keep going */ 132 nkcid = kstat_read(kc, ksp, NULL); 133 if (nkcid != -1) { 134 /* happiness and joy, keep going. we could consider adding a 135 "found < 3" to the end conditions, but then we wouldn't 136 search to the end and find that Sun added some nsec. we 137 probably want to see if they add an nsec. raj 2005-01-28 */ 138 for (i = ksp->ks_ndata, knp = ksp->ks_data; 139 i > 0; 140 knp++,i--) { 141 /* we would be hosed if the same name could appear twice */ 142 if (!strcmp("cpu_nsec_idle",knp->name)) { 143 found++; 144 counters[cpu_num].idle = knp->value.ui64; 145 } 146 else if (!strcmp("cpu_nsec_user",knp->name)) { 147 found++; 148 counters[cpu_num].user = knp->value.ui64; 149 } 150 else if (!strcmp("cpu_nsec_kernel",knp->name)) { 151 found++; 152 counters[cpu_num].kernel = knp->value.ui64; 153 } 154 else if (strstr(knp->name,"nsec")) { 155 /* finding another nsec here means Sun have changed 156 something and we need to warn the user. raj 2005-01-28 */ 157 print_unexpected_statistic_warning("get_cpu_counters", 158 knp->name, 159 "nsec"); 160 } 161 else if (debug >=2) { 162 163 /* might want to tell people about what we are skipping. 164 however, only display other names debug >=2. raj 165 2005-01-28 166 */ 167 168 print_unexpected_statistic_warning("get_cpu_counters", 169 knp->name, 170 NULL); 171 } 172 } 173 if (3 == found) { 174 /* happiness and joy */ 175 return; 176 } 177 else { 178 fprintf(where, 179 "get_cpu_counters could not find one or more of the expected counters!\n"); 180 fflush(where); 181 exit(-1); 182 } 183 } 184 else { 185 /* the kstat_read returned an error or the chain changed */ 186 fprintf(where, 187 "get_cpu_counters: kstat_read failed or chain id changed %d %s\n", 188 errno, 189 strerror(errno)); 190 fflush(where); 191 exit(-1); 192 } 193 } 194 else { 195 /* the lookup failed or found the wrong type */ 196 fprintf(where, 197 "get_cpu_counters: kstat_lookup failed for module 'cpu' instance %d name 'sys' and KSTAT_TYPE_NAMED: errno %d %s\n", 198 cpu_num, 199 errno, 200 strerror(errno)); 201 fflush(where); 202 exit(-1); 203 } 204 } 205 206 static void 207 get_interrupt_counters(int cpu_num, cpu_time_counters_t *counters) 208 { 209 kstat_t *ksp; 210 int found=0; 211 kid_t nkcid; 212 kstat_named_t *knp; 213 int i; 214 215 ksp = kstat_lookup(kc, "cpu", cpu_num, "intrstat"); 216 217 counters[cpu_num].interrupt = 0; 218 if ((ksp) && (ksp->ks_type == KSTAT_TYPE_NAMED)) { 219 /* happiness and joy, keep going */ 220 nkcid = kstat_read(kc, ksp, NULL); 221 if (nkcid != -1) { 222 /* happiness and joy, keep going. we could consider adding a 223 "found < 15" to the end conditions, but then we wouldn't 224 search to the end and find that Sun added some "time." we 225 probably want to see if they add a "nsec." raj 2005-01-28 */ 226 for (i = ksp->ks_ndata, knp = ksp->ks_data; 227 i > 0; 228 knp++,i--) { 229 if (strstr(knp->name,"time")) { 230 found++; 231 counters[cpu_num].interrupt += knp->value.ui64; 232 } 233 else if (debug >=2) { 234 235 /* might want to tell people about what we are skipping. 236 however, only display other names debug >=2. raj 237 2005-01-28 238 */ 239 240 print_unexpected_statistic_warning("get_cpu_counters", 241 knp->name, 242 NULL); 243 } 244 } 245 if (15 == found) { 246 /* happiness and joy */ 247 return; 248 } 249 else { 250 fprintf(where, 251 "get_cpu_counters could not find one or more of the expected counters!\n"); 252 fflush(where); 253 exit(-1); 254 } 255 } 256 else { 257 /* the kstat_read returned an error or the chain changed */ 258 fprintf(where, 259 "get_cpu_counters: kstat_read failed or chain id changed %d %s\n", 260 errno, 261 strerror(errno)); 262 fflush(where); 263 exit(-1); 264 } 265 } 266 else { 267 /* the lookup failed or found the wrong type */ 268 fprintf(where, 269 "get_cpu_counters: kstat_lookup failed for module 'cpu' instance %d class 'intrstat' and KSTAT_TYPE_NAMED: errno %d %s\n", 270 cpu_num, 271 errno, 272 strerror(errno)); 273 fflush(where); 274 exit(-1); 275 } 276 277 } 278 279 static void 280 get_cpu_time_counters(cpu_time_counters_t *counters) 281 { 282 283 int i; 284 285 for (i = 0; i < lib_num_loc_cpus; i++){ 286 get_cpu_counters(i, counters); 287 get_interrupt_counters(i, counters); 288 } 289 290 return; 291 } 292 293 /* the kstat10 mechanism, since it is based on actual nanosecond 294 counters is not going to use a comparison to an idle rate. so, the 295 calibrate_idle_rate routine will be rather simple :) raj 2005-01-28 296 */ 297 298 float 299 calibrate_idle_rate(int iterations, int interval) 300 { 301 return 0.0; 302 } 303 304 float 305 calc_cpu_util_internal(float elapsed_time) 306 { 307 int i; 308 float correction_factor; 309 float actual_rate; 310 311 uint64_t total_cpu_nsec; 312 313 /* multiply by 100 and divide by total and you get whole 314 percentages. multiply by 1000 and divide by total and you get 315 tenths of percentages. multiply by 10000 and divide by total and 316 you get hundredths of percentages. etc etc etc raj 2005-01-28 */ 317 318 #define CALC_PERCENT 100 319 #define CALC_TENTH_PERCENT 1000 320 #define CALC_HUNDREDTH_PERCENT 10000 321 #define CALC_THOUSANDTH_PERCENT 100000 322 #define CALC_ACCURACY CALC_THOUSANDTH_PERCENT 323 324 uint64_t fraction_idle; 325 uint64_t fraction_user; 326 uint64_t fraction_kernel; 327 uint64_t fraction_interrupt; 328 329 uint64_t interrupt_idle; 330 uint64_t interrupt_user; 331 uint64_t interrupt_kernel; 332 333 lib_local_cpu_util = (float)0.0; 334 335 /* It is possible that the library measured a time other than */ 336 /* the one that the user want for the cpu utilization */ 337 /* calculations - for example, tests that were ended by */ 338 /* watchdog timers such as the udp stream test. We let these */ 339 /* tests tell up what the elapsed time should be. */ 340 341 if (elapsed_time != 0.0) { 342 correction_factor = (float) 1.0 + 343 ((lib_elapsed - elapsed_time) / elapsed_time); 344 } 345 else { 346 correction_factor = (float) 1.0; 347 } 348 349 for (i = 0; i < lib_num_loc_cpus; i++) { 350 351 /* this is now the fun part. we have the nanoseconds _allegedly_ 352 spent in user, idle and kernel. We also have nanoseconds spent 353 servicing interrupts. Sadly, in the developer's finite wisdom, 354 the interrupt time accounting is in parallel with the other 355 accounting. this means that time accounted in user, kernel or 356 idle will also include time spent in interrupt. for netperf's 357 porpoises we do not really care about that for user and kernel, 358 but we certainly do care for idle. the $64B question becomes - 359 how to "correct" for this? 360 361 we could just subtract interrupt time from idle. that has the 362 virtue of simplicity and also "punishes" Sun for doing 363 something that seems to be so stupid. however, we probably 364 have to be "fair" even to the allegedly stupid so the other 365 mechanism, suggested by a Sun engineer is to subtract interrupt 366 time from each of user, kernel and idle in proportion to their 367 numbers. then we sum the corrected user, kernel and idle along 368 with the interrupt time and use that to calculate a new idle 369 percentage and thus a CPU util percentage. 370 371 that is what we will attempt to do here. raj 2005-01-28 372 373 of course, we also have to wonder what we should do if there is 374 more interrupt time than the sum of user, kernel and idle. 375 that is a theoretical possibility I suppose, but for the 376 time-being, one that we will blythly ignore, except perhaps for 377 a quick check. raj 2005-01-31 378 */ 379 380 /* we ass-u-me that these counters will never wrap during a 381 netperf run. this may not be a particularly safe thing to 382 do. raj 2005-01-28 */ 383 delta_cpu_counters[i].idle = ending_cpu_counters[i].idle - 384 starting_cpu_counters[i].idle; 385 delta_cpu_counters[i].user = ending_cpu_counters[i].user - 386 starting_cpu_counters[i].user; 387 delta_cpu_counters[i].kernel = ending_cpu_counters[i].kernel - 388 starting_cpu_counters[i].kernel; 389 delta_cpu_counters[i].interrupt = ending_cpu_counters[i].interrupt - 390 starting_cpu_counters[i].interrupt; 391 392 if (debug) { 393 print_cpu_time_counters("delta_cpu_counters",i,delta_cpu_counters); 394 } 395 396 /* for this summation, we do not include interrupt time */ 397 total_cpu_nsec = 398 delta_cpu_counters[i].idle + 399 delta_cpu_counters[i].user + 400 delta_cpu_counters[i].kernel; 401 402 if (debug) { 403 fprintf(where,"total_cpu_nsec %llu\n",total_cpu_nsec); 404 } 405 406 if (delta_cpu_counters[i].interrupt > total_cpu_nsec) { 407 /* we are not in Kansas any more Toto, and I am not quite sure 408 the best way to get our tails out of here so let us just 409 punt. raj 2005-01-31 */ 410 fprintf(where, 411 "WARNING! WARNING! WARNING! WARNING! WARNING! \n"); 412 fprintf(where, 413 "calc_cpu_util_internal: more interrupt time than others combined!\n"); 414 fprintf(where, 415 "\tso CPU util cannot be estimated\n"); 416 fprintf(where, 417 "\t delta[%d].interrupt %llu\n",i,delta_cpu_counters[i].interrupt); 418 fprintf(where, 419 "\t delta[%d].idle %llu\n",i,delta_cpu_counters[i].idle); 420 fprintf(where, 421 "\t delta[%d].user %llu\n",i,delta_cpu_counters[i].user); 422 fprintf(where, 423 "\t delta[%d].kernel %llu\n",i,delta_cpu_counters[i].kernel); 424 fflush(where); 425 426 lib_local_cpu_util = -1.0; 427 lib_local_per_cpu_util[i] = -1.0; 428 return -1.0; 429 } 430 431 /* and now some fun with integer math. i initially tried to 432 promote things to long doubled but that didn't seem to result 433 in happiness and joy. raj 2005-01-28 */ 434 435 fraction_idle = 436 (delta_cpu_counters[i].idle * CALC_ACCURACY) / total_cpu_nsec; 437 438 fraction_user = 439 (delta_cpu_counters[i].user * CALC_ACCURACY) / total_cpu_nsec; 440 441 fraction_kernel = 442 (delta_cpu_counters[i].kernel * CALC_ACCURACY) / total_cpu_nsec; 443 444 /* ok, we have our fractions, now we want to take that fraction of 445 the interrupt time and subtract that from the bucket. */ 446 447 interrupt_idle = ((delta_cpu_counters[i].interrupt * fraction_idle) / 448 CALC_ACCURACY); 449 450 interrupt_user = ((delta_cpu_counters[i].interrupt * fraction_user) / 451 CALC_ACCURACY); 452 453 interrupt_kernel = ((delta_cpu_counters[i].interrupt * fraction_kernel) / 454 CALC_ACCURACY); 455 456 if (debug) { 457 fprintf(where, 458 "\tfraction_idle %llu interrupt_idle %llu\n", 459 fraction_idle, 460 interrupt_idle); 461 fprintf(where, 462 "\tfraction_user %llu interrupt_user %llu\n", 463 fraction_user, 464 interrupt_user); 465 fprintf(where,"\tfraction_kernel %llu interrupt_kernel %llu\n", 466 fraction_kernel, 467 interrupt_kernel); 468 } 469 470 corrected_cpu_counters[i].idle = delta_cpu_counters[i].idle - 471 interrupt_idle; 472 473 corrected_cpu_counters[i].user = delta_cpu_counters[i].user - 474 interrupt_user; 475 476 corrected_cpu_counters[i].kernel = delta_cpu_counters[i].kernel - 477 interrupt_kernel; 478 479 corrected_cpu_counters[i].interrupt = delta_cpu_counters[i].interrupt; 480 481 if (debug) { 482 print_cpu_time_counters("corrected_cpu_counters", 483 i, 484 corrected_cpu_counters); 485 } 486 487 /* I was going to checkfor going less than zero, but since all the 488 calculations are in unsigned quantities that would seem to be a 489 triffle silly... raj 2005-01-28 */ 490 491 /* ok, now we sum the numbers again, this time including interrupt 492 */ 493 494 total_cpu_nsec = 495 corrected_cpu_counters[i].idle + 496 corrected_cpu_counters[i].user + 497 corrected_cpu_counters[i].kernel + 498 corrected_cpu_counters[i].interrupt; 499 500 /* and recalculate our fractions we are really only going to use 501 fraction_idle, but lets calculate the rest just for the heck of 502 it. one day we may want to display them. raj 2005-01-28 */ 503 504 /* multiply by 100 and divide by total and you get whole 505 percentages. multiply by 1000 and divide by total and you get 506 tenths of percentages. multiply by 10000 and divide by total 507 and you get hundredths of percentages. etc etc etc raj 508 2005-01-28 */ 509 fraction_idle = 510 (corrected_cpu_counters[i].idle * CALC_ACCURACY) / total_cpu_nsec; 511 512 fraction_user = 513 (corrected_cpu_counters[i].user * CALC_ACCURACY) / total_cpu_nsec; 514 515 fraction_kernel = 516 (corrected_cpu_counters[i].kernel * CALC_ACCURACY) / total_cpu_nsec; 517 518 fraction_interrupt = 519 (corrected_cpu_counters[i].interrupt * CALC_ACCURACY) / total_cpu_nsec; 520 521 if (debug) { 522 fprintf(where,"\tfraction_idle %lu\n",fraction_idle); 523 fprintf(where,"\tfraction_user %lu\n",fraction_user); 524 fprintf(where,"\tfraction_kernel %lu\n",fraction_kernel); 525 fprintf(where,"\tfraction_interrupt %lu\n",fraction_interrupt); 526 } 527 528 /* and finally, what is our CPU utilization? */ 529 lib_local_per_cpu_util[i] = 100.0 - (((float)fraction_idle / 530 (float)CALC_ACCURACY) * 100.0); 531 if (debug) { 532 fprintf(where, 533 "lib_local_per_cpu_util[%d] %g\n", 534 i, 535 lib_local_per_cpu_util[i]); 536 } 537 lib_local_cpu_util += lib_local_per_cpu_util[i]; 538 } 539 /* we want the average across all n processors */ 540 lib_local_cpu_util /= (float)lib_num_loc_cpus; 541 542 lib_local_cpu_util *= correction_factor; 543 return lib_local_cpu_util; 544 545 546 } 547 548 void 549 cpu_start_internal(void) 550 { 551 get_cpu_time_counters(starting_cpu_counters); 552 return; 553 } 554 555 void 556 cpu_stop_internal(void) 557 { 558 get_cpu_time_counters(ending_cpu_counters); 559 } 560