1 char netcpu_looper_id[]="\ 2 @(#)netcpu_looper.c (c) Copyright 2005-2007. Version 2.4.3"; 3 4 /* netcpu_looper.c 5 6 Implement the soaker process specific portions of netperf CPU 7 utilization measurements. These are broken-out into a separate file 8 to make life much nicer over in netlib.c which had become a maze of 9 twisty, CPU-util-related, #ifdefs, all different. raj 2005-01-26 10 */ 11 12 #ifdef HAVE_CONFIG_H 13 #include <config.h> 14 #endif 15 16 #include <stdio.h> 17 18 #ifdef HAVE_FCNTL_H 19 # include <fcntl.h> 20 #endif 21 #if HAVE_UNISTD_H 22 # include <unistd.h> 23 #endif 24 #if defined(HAVE_MMAP) || defined(HAVE_SYS_MMAN_H) 25 # include <sys/mman.h> 26 #else 27 # error netcpu_looper requires mmap 28 #endif 29 30 #if TIME_WITH_SYS_TIME 31 # include <sys/time.h> 32 # include <time.h> 33 #else 34 # if HAVE_SYS_TIME_H 35 # include <sys/time.h> 36 # else 37 # include <time.h> 38 # endif 39 #endif 40 41 #if HAVE_SYS_TYPES_H 42 # include <sys/types.h> 43 #endif 44 45 #if HAVE_SYS_WAIT_H 46 # include <sys/wait.h> 47 #endif 48 49 #ifdef HAVE_SIGNAL_H 50 #include <signal.h> 51 #endif 52 53 #ifdef HAVE_ERRNO_H 54 #include <errno.h> 55 #endif 56 57 #include "netsh.h" 58 #include "netlib.h" 59 60 #define PAGES_PER_CHILD 2 61 62 /* the lib_start_count and lib_end_count arrays hold the starting 63 and ending values of whatever is counting when the system is 64 idle. The rate at which this increments during a test is compared 65 with a previous calibrarion to arrive at a CPU utilization 66 percentage. raj 2005-01-26 */ 67 static uint64_t lib_start_count[MAXCPUS]; 68 static uint64_t lib_end_count[MAXCPUS]; 69 70 static int *cpu_mappings; 71 72 static int lib_idle_fd; 73 static uint64_t *lib_idle_address[MAXCPUS]; 74 static long *lib_base_pointer; 75 static pid_t lib_idle_pids[MAXCPUS]; 76 static int lib_loopers_running=0; 77 78 /* we used to use this code to bind the loopers, but since we have 79 decided to enable processor affinity for the actual 80 netperf/netserver processes we will use that affinity routine, 81 which happens to know about more systems than this */ 82 83 #ifdef NOTDEF 84 static void 85 bind_to_processor(int child_num) 86 { 87 /* This routine will bind the calling process to a particular */ 88 /* processor. We are not choosy as to which processor, so it will be */ 89 /* the process id mod the number of processors - shifted by one for */ 90 /* those systems which name processor starting from one instead of */ 91 /* zero. on those systems where I do not yet know how to bind a */ 92 /* process to a processor, this routine will be a no-op raj 10/95 */ 93 94 /* just as a reminder, this is *only* for the looper processes, not */ 95 /* the actual measurement processes. those will, should, MUST float */ 96 /* or not float from CPU to CPU as controlled by the operating */ 97 /* system defaults. raj 12/95 */ 98 99 #ifdef __hpux 100 #include <sys/syscall.h> 101 #include <sys/mp.h> 102 103 int old_cpu = -2; 104 105 if (debug) { 106 fprintf(where, 107 "child %d asking for CPU %d as pid %d with %d CPUs\n", 108 child_num, 109 (child_num % lib_num_loc_cpus), 110 getpid(), 111 lib_num_loc_cpus); 112 fflush(where); 113 } 114 115 SETPROCESS((child_num % lib_num_loc_cpus), getpid()); 116 return; 117 118 #else 119 #if defined(__sun) && defined(__SVR4) 120 /* should only be Solaris */ 121 #include <sys/processor.h> 122 #include <sys/procset.h> 123 124 int old_binding; 125 126 if (debug) { 127 fprintf(where, 128 "bind_to_processor: child %d asking for CPU %d as pid %d with %d CPUs\n", 129 child_num, 130 (child_num % lib_num_loc_cpus), 131 getpid(), 132 lib_num_loc_cpus); 133 fflush(where); 134 } 135 136 if (processor_bind(P_PID, 137 getpid(), 138 (child_num % lib_num_loc_cpus), 139 &old_binding) != 0) { 140 fprintf(where,"bind_to_processor: unable to perform processor binding\n"); 141 fprintf(where," errno %d\n",errno); 142 fflush(where); 143 } 144 return; 145 #else 146 #ifdef WIN32 147 148 if (!SetThreadAffinityMask(GetCurrentThread(), (ULONG_PTR)1 << (child_num % lib_num_loc_cpus))) { 149 perror("SetThreadAffinityMask failed"); 150 fflush(stderr); 151 } 152 153 if (debug) { 154 fprintf(where, 155 "bind_to_processor: child %d asking for CPU %d of %d CPUs\n", 156 child_num, 157 (child_num % lib_num_loc_cpus), 158 lib_num_loc_cpus); 159 fflush(where); 160 } 161 162 #endif 163 return; 164 #endif /* __sun && _SVR4 */ 165 #endif /* __hpux */ 166 } 167 #endif 168 169 /* sit_and_spin will just spin about incrementing a value */ 170 /* this value will either be in a memory mapped region on Unix shared */ 171 /* by each looper process, or something appropriate on Windows/NT */ 172 /* (malloc'd or such). This routine is reasonably ugly in that it has */ 173 /* priority manipulating code for lots of different operating */ 174 /* systems. This routine never returns. raj 1/96 */ 175 176 static void 177 sit_and_spin(int child_index) 178 179 { 180 uint64_t *my_counter_ptr; 181 182 /* only use C stuff if we are not WIN32 unless and until we */ 183 /* switch from CreateThread to _beginthread. raj 1/96 */ 184 #ifndef WIN32 185 /* we are the child. we could decide to exec some separate */ 186 /* program, but that doesn't really seem worthwhile - raj 4/95 */ 187 if (debug > 1) { 188 fprintf(where, 189 "Looper child %d is born, pid %d\n", 190 child_index, 191 getpid()); 192 fflush(where); 193 } 194 195 #endif /* WIN32 */ 196 197 /* reset our base pointer to be at the appropriate offset */ 198 my_counter_ptr = (uint64_t *) ((char *)lib_base_pointer + 199 (netlib_get_page_size() * 200 PAGES_PER_CHILD * child_index)); 201 202 /* in the event we are running on an MP system, it would */ 203 /* probably be good to bind the soaker processes to specific */ 204 /* processors. I *think* this is the most reasonable thing to */ 205 /* do, and would be closes to simulating the information we get */ 206 /* on HP-UX with pstat. I could put all the system-specific code */ 207 /* here, but will "abstract it into another routine to keep this */ 208 /* area more readable. I'll probably do the same thine with the */ 209 /* "low pri code" raj 10/95 */ 210 211 /* since we are "flying blind" wrt where we should bind the looper 212 processes, we want to use the cpu_map that was prepared by netlib 213 rather than assume that the CPU ids on the system start at zero 214 and are contiguous. raj 2006-04-03 */ 215 bind_to_specific_processor(child_index % lib_num_loc_cpus,1); 216 217 for (*my_counter_ptr = 0L; 218 ; 219 (*my_counter_ptr)++) { 220 if (!(*lib_base_pointer % 1)) { 221 /* every once and again, make sure that our process priority is */ 222 /* nice and low. also, by making system calls, it may be easier */ 223 /* for us to be pre-empted by something that needs to do useful */ 224 /* work - like the thread of execution actually sending and */ 225 /* receiving data across the network :) */ 226 #ifdef _AIX 227 int pid,prio; 228 229 prio = PRIORITY; 230 pid = getpid(); 231 /* if you are not root, this call will return EPERM - why one */ 232 /* cannot change one's own priority to lower value is beyond */ 233 /* me. raj 2/26/96 */ 234 setpri(pid, prio); 235 #else /* _AIX */ 236 #ifdef __sgi 237 int pid,prio; 238 239 prio = PRIORITY; 240 pid = getpid(); 241 schedctl(NDPRI, pid, prio); 242 sginap(0); 243 #else /* __sgi */ 244 #ifdef WIN32 245 SetThreadPriority(GetCurrentThread(),THREAD_PRIORITY_IDLE); 246 #else /* WIN32 */ 247 #if defined(__sun) && defined(__SVR4) 248 #include <sys/types.h> 249 #include <sys/priocntl.h> 250 #include <sys/rtpriocntl.h> 251 #include <sys/tspriocntl.h> 252 /* I would *really* like to know how to use priocntl to make the */ 253 /* priority low for this looper process. however, either my mind */ 254 /* is addled, or the manpage in section two for priocntl is not */ 255 /* terribly helpful - for one, it has no examples :( so, if you */ 256 /* can help, I'd love to hear from you. in the meantime, we will */ 257 /* rely on nice(39). raj 2/26/96 */ 258 nice(39); 259 #else /* __sun && __SVR4 */ 260 nice(39); 261 #endif /* __sun && _SVR4 */ 262 #endif /* WIN32 */ 263 #endif /* __sgi */ 264 #endif /* _AIX */ 265 } 266 } 267 } 268 269 270 272 /* this routine will start all the looper processes or threads for */ 273 /* measuring CPU utilization. */ 274 275 static void 276 start_looper_processes() 277 { 278 279 unsigned int i, file_size; 280 281 /* we want at least two pages for each processor. the */ 282 /* child for any one processor will write to the first of his two */ 283 /* pages, and the second page will be a buffer in case there is page */ 284 /* prefetching. if your system pre-fetches more than a single page, */ 285 /* well, you'll have to modify this or live with it :( raj 4/95 */ 286 287 file_size = ((netlib_get_page_size() * PAGES_PER_CHILD) * 288 lib_num_loc_cpus); 289 290 #ifndef WIN32 291 292 /* we we are not using WINDOWS NT (or 95 actually :), then we want */ 293 /* to create a memory mapped region so we can see all the counting */ 294 /* rates of the loopers */ 295 296 /* could we just use an anonymous memory region for this? it is */ 297 /* possible that using a mmap()'ed "real" file, while convenient for */ 298 /* debugging, could result in some filesystem activity - like */ 299 /* metadata updates? raj 4/96 */ 300 lib_idle_fd = open("/tmp/netperf_cpu",O_RDWR | O_CREAT | O_EXCL); 301 302 if (lib_idle_fd == -1) { 303 fprintf(where,"create_looper: file creation; errno %d\n",errno); 304 fflush(where); 305 exit(1); 306 } 307 308 if (chmod("/tmp/netperf_cpu",0644) == -1) { 309 fprintf(where,"create_looper: chmod; errno %d\n",errno); 310 fflush(where); 311 exit(1); 312 } 313 314 /* with the file descriptor in place, lets be sure that the file is */ 315 /* large enough. */ 316 317 if (truncate("/tmp/netperf_cpu",file_size) == -1) { 318 fprintf(where,"create_looper: truncate: errno %d\n",errno); 319 fflush(where); 320 exit(1); 321 } 322 323 /* the file should be large enough now, so we can mmap it */ 324 325 /* if the system does not have MAP_VARIABLE, just define it to */ 326 /* be zero. it is only used/needed on HP-UX (?) raj 4/95 */ 327 #ifndef MAP_VARIABLE 328 #define MAP_VARIABLE 0x0000 329 #endif /* MAP_VARIABLE */ 330 #ifndef MAP_FILE 331 #define MAP_FILE 0x0000 332 #endif /* MAP_FILE */ 333 if ((lib_base_pointer = (long *)mmap(NULL, 334 file_size, 335 PROT_READ | PROT_WRITE, 336 MAP_FILE | MAP_SHARED | MAP_VARIABLE, 337 lib_idle_fd, 338 0)) == (long *)-1) { 339 fprintf(where,"create_looper: mmap: errno %d\n",errno); 340 fflush(where); 341 exit(1); 342 } 343 344 345 if (debug > 1) { 346 fprintf(where,"num CPUs %d, file_size %d, lib_base_pointer %p\n", 347 lib_num_loc_cpus, 348 file_size, 349 lib_base_pointer); 350 fflush(where); 351 } 352 353 /* we should have a valid base pointer. lets fork */ 354 355 for (i = 0; i < (unsigned int)lib_num_loc_cpus; i++) { 356 switch (lib_idle_pids[i] = fork()) { 357 case -1: 358 perror("netperf: fork"); 359 exit(1); 360 case 0: 361 /* we are the child. we could decide to exec some separate */ 362 /* program, but that doesn't really seem worthwhile - raj 4/95 */ 363 364 signal(SIGTERM, SIG_DFL); 365 sit_and_spin(i); 366 367 /* we should never really get here, but if we do, just exit(0) */ 368 exit(0); 369 break; 370 default: 371 /* we must be the parent */ 372 lib_idle_address[i] = (uint64_t *) ((char *)lib_base_pointer + 373 (netlib_get_page_size() * 374 PAGES_PER_CHILD * i)); 375 if (debug) { 376 fprintf(where,"lib_idle_address[%d] is %p\n", 377 i, 378 lib_idle_address[i]); 379 fflush(where); 380 } 381 } 382 } 383 #else 384 /* we are compiled -DWIN32 */ 385 if ((lib_base_pointer = malloc(file_size)) == NULL) { 386 fprintf(where, 387 "create_looper_process could not malloc %d bytes\n", 388 file_size); 389 fflush(where); 390 exit(1); 391 } 392 393 /* now, create all the threads */ 394 for(i = 0; i < (unsigned int)lib_num_loc_cpus; i++) { 395 long place_holder; 396 if ((lib_idle_pids[i] = CreateThread(0, 397 0, 398 (LPTHREAD_START_ROUTINE)sit_and_spin, 399 (LPVOID)(ULONG_PTR)i, 400 0, 401 &place_holder)) == NULL ) { 402 fprintf(where, 403 "create_looper_process: CreateThread failed\n"); 404 fflush(where); 405 /* I wonder if I need to look for other threads to kill? */ 406 exit(1); 407 } 408 lib_idle_address[i] = (long *) ((char *)lib_base_pointer + 409 (netlib_get_page_size() * 410 PAGES_PER_CHILD * i)); 411 if (debug) { 412 fprintf(where,"lib_idle_address[%d] is %p\n", 413 i, 414 lib_idle_address[i]); 415 fflush(where); 416 } 417 } 418 #endif /* WIN32 */ 419 420 /* we need to have the looper processes settled-in before we do */ 421 /* anything with them, so lets sleep for say 30 seconds. raj 4/95 */ 422 423 sleep(30); 424 } 425 426 void 427 cpu_util_init(void) 428 { 429 cpu_method = LOOPER; 430 431 /* we want to get the looper processes going */ 432 if (!lib_loopers_running) { 433 start_looper_processes(); 434 lib_loopers_running = 1; 435 } 436 437 return; 438 } 439 440 /* clean-up any left-over CPU util resources - looper processes, 441 files, whatever. raj 2005-01-26 */ 442 void 443 cpu_util_terminate() { 444 445 #ifdef WIN32 446 /* it would seem that if/when the process exits, all the threads */ 447 /* will go away too, so I don't think I need any explicit thread */ 448 /* killing calls here. raj 1/96 */ 449 #else 450 451 int i; 452 453 /* now go through and kill-off all the child processes */ 454 for (i = 0; i < lib_num_loc_cpus; i++){ 455 /* SIGKILL can leave core files behind - thanks to Steinar Haug */ 456 /* for pointing that out. */ 457 kill(lib_idle_pids[i],SIGTERM); 458 } 459 lib_loopers_running = 0; 460 /* reap the children */ 461 while(waitpid(-1, NULL, WNOHANG) > 0) { } 462 463 /* finally, unlink the mmaped file */ 464 munmap((caddr_t)lib_base_pointer, 465 ((netlib_get_page_size() * PAGES_PER_CHILD) * 466 lib_num_loc_cpus)); 467 unlink("/tmp/netperf_cpu"); 468 #endif 469 return; 470 } 471 472 int 473 get_cpu_method(void) 474 { 475 return LOOPER; 476 } 477 478 /* calibrate_looper */ 479 480 /* Loop a number of iterations, sleeping interval seconds each and */ 481 /* count how high the idle counter gets each time. Return the */ 482 /* measured cpu rate to the calling routine. raj 4/95 */ 483 484 float 485 calibrate_idle_rate (int iterations, int interval) 486 { 487 488 uint64_t 489 firstcnt[MAXCPUS], 490 secondcnt[MAXCPUS]; 491 492 float 493 elapsed, 494 temp_rate, 495 rate[MAXTIMES], 496 local_maxrate; 497 498 long 499 sec, 500 usec; 501 502 int 503 i, 504 j; 505 506 struct timeval time1, time2 ; 507 struct timezone tz; 508 509 if (iterations > MAXTIMES) { 510 iterations = MAXTIMES; 511 } 512 513 local_maxrate = (float)-1.0; 514 515 for(i = 0; i < iterations; i++) { 516 rate[i] = (float)0.0; 517 for (j = 0; j < lib_num_loc_cpus; j++) { 518 firstcnt[j] = *(lib_idle_address[j]); 519 } 520 gettimeofday (&time1, &tz); 521 sleep(interval); 522 gettimeofday (&time2, &tz); 523 524 if (time2.tv_usec < time1.tv_usec) 525 { 526 time2.tv_usec += 1000000; 527 time2.tv_sec -=1; 528 } 529 sec = time2.tv_sec - time1.tv_sec; 530 usec = time2.tv_usec - time1.tv_usec; 531 elapsed = (float)sec + ((float)usec/(float)1000000.0); 532 533 if(debug) { 534 fprintf(where, "Calibration for counter run: %d\n",i); 535 fprintf(where,"\tsec = %ld usec = %ld\n",sec,usec); 536 fprintf(where,"\telapsed time = %g\n",elapsed); 537 } 538 539 for (j = 0; j < lib_num_loc_cpus; j++) { 540 secondcnt[j] = *(lib_idle_address[j]); 541 if(debug) { 542 /* I know that there are situations where compilers know about */ 543 /* long long, but the library fucntions do not... raj 4/95 */ 544 fprintf(where, 545 "\tfirstcnt[%d] = 0x%8.8lx%8.8lx secondcnt[%d] = 0x%8.8lx%8.8lx\n", 546 j, 547 (uint32_t)(firstcnt[j]>>32), 548 (uint32_t)(firstcnt[j]&0xffffffff), 549 j, 550 (uint32_t)(secondcnt[j]>>32), 551 (uint32_t)(secondcnt[j]&0xffffffff)); 552 } 553 /* we assume that it would wrap no more than once. we also */ 554 /* assume that the result of subtracting will "fit" raj 4/95 */ 555 temp_rate = (secondcnt[j] >= firstcnt[j]) ? 556 (float)(secondcnt[j] - firstcnt[j])/elapsed : 557 (float)(secondcnt[j]-firstcnt[j]+MAXLONG)/elapsed; 558 if (temp_rate > rate[i]) rate[i] = temp_rate; 559 if(debug) { 560 fprintf(where,"\trate[%d] = %g\n",i,rate[i]); 561 fflush(where); 562 } 563 if (local_maxrate < rate[i]) local_maxrate = rate[i]; 564 } 565 } 566 if(debug) { 567 fprintf(where,"\tlocal maxrate = %g per sec. \n",local_maxrate); 568 fflush(where); 569 } 570 return local_maxrate; 571 } 572 573 574 void 575 get_cpu_idle (uint64_t *res) 576 { 577 int i; 578 579 for (i = 0; i < lib_num_loc_cpus; i++){ 580 res[i] = *lib_idle_address[i]; 581 } 582 583 } 584 585 float 586 calc_cpu_util_internal(float elapsed_time) 587 { 588 int i; 589 float correction_factor; 590 float actual_rate; 591 592 lib_local_cpu_util = (float)0.0; 593 /* It is possible that the library measured a time other than */ 594 /* the one that the user want for the cpu utilization */ 595 /* calculations - for example, tests that were ended by */ 596 /* watchdog timers such as the udp stream test. We let these */ 597 /* tests tell up what the elapsed time should be. */ 598 599 if (elapsed_time != 0.0) { 600 correction_factor = (float) 1.0 + 601 ((lib_elapsed - elapsed_time) / elapsed_time); 602 } 603 else { 604 correction_factor = (float) 1.0; 605 } 606 607 for (i = 0; i < lib_num_loc_cpus; i++) { 608 609 /* it would appear that on some systems, in loopback, nice is 610 *very* effective, causing the looper process to stop dead in its 611 tracks. if this happens, we need to ensure that the calculation 612 does not go south. raj 6/95 and if we run completely out of idle, 613 the same thing could in theory happen to the USE_KSTAT path. raj 614 8/2000 */ 615 616 if (lib_end_count[i] == lib_start_count[i]) { 617 lib_end_count[i]++; 618 } 619 620 actual_rate = (lib_end_count[i] > lib_start_count[i]) ? 621 (float)(lib_end_count[i] - lib_start_count[i])/lib_elapsed : 622 (float)(lib_end_count[i] - lib_start_count[i] + 623 MAXLONG)/ lib_elapsed; 624 if (debug) { 625 fprintf(where, 626 "calc_cpu_util: actual_rate on processor %d is %f start 0x%8.8lx%8.8lx end 0x%8.8lx%8.8lx\n", 627 i, 628 actual_rate, 629 (uint32_t)(lib_start_count[i]>>32), 630 (uint32_t)(lib_start_count[i]&0xffffffff), 631 (uint32_t)(lib_end_count[i]>>32), 632 (uint32_t)(lib_end_count[i]&0xffffffff)); 633 } 634 lib_local_per_cpu_util[i] = (lib_local_maxrate - actual_rate) / 635 lib_local_maxrate * 100; 636 lib_local_cpu_util += lib_local_per_cpu_util[i]; 637 } 638 /* we want the average across all n processors */ 639 lib_local_cpu_util /= (float)lib_num_loc_cpus; 640 641 lib_local_cpu_util *= correction_factor; 642 return lib_local_cpu_util; 643 644 645 } 646 void 647 cpu_start_internal(void) 648 { 649 get_cpu_idle(lib_start_count); 650 return; 651 } 652 653 void 654 cpu_stop_internal(void) 655 { 656 get_cpu_idle(lib_end_count); 657 } 658