Home | History | Annotate | Download | only in netperf
      1 char   netcpu_looper_id[]="\
      2 @(#)netcpu_looper.c (c) Copyright 2005-2007. Version 2.4.3";
      3 
      4 /* netcpu_looper.c
      5 
      6    Implement the soaker process specific portions of netperf CPU
      7    utilization measurements. These are broken-out into a separate file
      8    to make life much nicer over in netlib.c which had become a maze of
      9    twisty, CPU-util-related, #ifdefs, all different.  raj 2005-01-26
     10    */
     11 
     12 #ifdef HAVE_CONFIG_H
     13 #include <config.h>
     14 #endif
     15 
     16 #include <stdio.h>
     17 
     18 #ifdef HAVE_FCNTL_H
     19 # include <fcntl.h>
     20 #endif
     21 #if HAVE_UNISTD_H
     22 # include <unistd.h>
     23 #endif
     24 #if defined(HAVE_MMAP) || defined(HAVE_SYS_MMAN_H)
     25 # include <sys/mman.h>
     26 #else
     27 # error netcpu_looper requires mmap
     28 #endif
     29 
     30 #if TIME_WITH_SYS_TIME
     31 # include <sys/time.h>
     32 # include <time.h>
     33 #else
     34 # if HAVE_SYS_TIME_H
     35 #  include <sys/time.h>
     36 # else
     37 #  include <time.h>
     38 # endif
     39 #endif
     40 
     41 #if HAVE_SYS_TYPES_H
     42 # include <sys/types.h>
     43 #endif
     44 
     45 #if HAVE_SYS_WAIT_H
     46 # include <sys/wait.h>
     47 #endif
     48 
     49 #ifdef HAVE_SIGNAL_H
     50 #include <signal.h>
     51 #endif
     52 
     53 #ifdef HAVE_ERRNO_H
     54 #include <errno.h>
     55 #endif
     56 
     57 #include "netsh.h"
     58 #include "netlib.h"
     59 
     60 #define PAGES_PER_CHILD 2
     61 
     62 /* the lib_start_count and lib_end_count arrays hold the starting
     63    and ending values of whatever is counting when the system is
     64    idle. The rate at which this increments during a test is compared
     65    with a previous calibrarion to arrive at a CPU utilization
     66    percentage. raj 2005-01-26 */
     67 static uint64_t  lib_start_count[MAXCPUS];
     68 static uint64_t  lib_end_count[MAXCPUS];
     69 
     70 static int *cpu_mappings;
     71 
     72 static int lib_idle_fd;
     73 static uint64_t *lib_idle_address[MAXCPUS];
     74 static long     *lib_base_pointer;
     75 static pid_t     lib_idle_pids[MAXCPUS];
     76 static int       lib_loopers_running=0;
     77 
     78 /* we used to use this code to bind the loopers, but since we have
     79    decided to enable processor affinity for the actual
     80    netperf/netserver processes we will use that affinity routine,
     81    which happens to know about more systems than this */
     82 
     83 #ifdef NOTDEF
     84 static void
     85 bind_to_processor(int child_num)
     86 {
     87   /* This routine will bind the calling process to a particular */
     88   /* processor. We are not choosy as to which processor, so it will be */
     89   /* the process id mod the number of processors - shifted by one for */
     90   /* those systems which name processor starting from one instead of */
     91   /* zero. on those systems where I do not yet know how to bind a */
     92   /* process to a processor, this routine will be a no-op raj 10/95 */
     93 
     94   /* just as a reminder, this is *only* for the looper processes, not */
     95   /* the actual measurement processes. those will, should, MUST float */
     96   /* or not float from CPU to CPU as controlled by the operating */
     97   /* system defaults. raj 12/95 */
     98 
     99 #ifdef __hpux
    100 #include <sys/syscall.h>
    101 #include <sys/mp.h>
    102 
    103   int old_cpu = -2;
    104 
    105   if (debug) {
    106     fprintf(where,
    107             "child %d asking for CPU %d as pid %d with %d CPUs\n",
    108             child_num,
    109             (child_num % lib_num_loc_cpus),
    110             getpid(),
    111             lib_num_loc_cpus);
    112     fflush(where);
    113   }
    114 
    115   SETPROCESS((child_num % lib_num_loc_cpus), getpid());
    116   return;
    117 
    118 #else
    119 #if defined(__sun) && defined(__SVR4)
    120  /* should only be Solaris */
    121 #include <sys/processor.h>
    122 #include <sys/procset.h>
    123 
    124   int old_binding;
    125 
    126   if (debug) {
    127     fprintf(where,
    128             "bind_to_processor: child %d asking for CPU %d as pid %d with %d CPUs\n",
    129             child_num,
    130             (child_num % lib_num_loc_cpus),
    131             getpid(),
    132             lib_num_loc_cpus);
    133     fflush(where);
    134   }
    135 
    136   if (processor_bind(P_PID,
    137                      getpid(),
    138                      (child_num % lib_num_loc_cpus),
    139                       &old_binding) != 0) {
    140     fprintf(where,"bind_to_processor: unable to perform processor binding\n");
    141     fprintf(where,"                   errno %d\n",errno);
    142     fflush(where);
    143   }
    144   return;
    145 #else
    146 #ifdef WIN32
    147 
    148   if (!SetThreadAffinityMask(GetCurrentThread(), (ULONG_PTR)1 << (child_num % lib_num_loc_cpus))) {
    149     perror("SetThreadAffinityMask failed");
    150     fflush(stderr);
    151   }
    152 
    153   if (debug) {
    154     fprintf(where,
    155             "bind_to_processor: child %d asking for CPU %d of %d CPUs\n",
    156             child_num,
    157             (child_num % lib_num_loc_cpus),
    158             lib_num_loc_cpus);
    159     fflush(where);
    160   }
    161 
    162 #endif
    163   return;
    164 #endif /* __sun && _SVR4 */
    165 #endif /* __hpux */
    166 }
    167 #endif
    168 
    169  /* sit_and_spin will just spin about incrementing a value */
    170  /* this value will either be in a memory mapped region on Unix shared */
    171  /* by each looper process, or something appropriate on Windows/NT */
    172  /* (malloc'd or such). This routine is reasonably ugly in that it has */
    173  /* priority manipulating code for lots of different operating */
    174  /* systems. This routine never returns. raj 1/96 */
    175 
    176 static void
    177 sit_and_spin(int child_index)
    178 
    179 {
    180   uint64_t *my_counter_ptr;
    181 
    182  /* only use C stuff if we are not WIN32 unless and until we */
    183  /* switch from CreateThread to _beginthread. raj 1/96 */
    184 #ifndef WIN32
    185   /* we are the child. we could decide to exec some separate */
    186   /* program, but that doesn't really seem worthwhile - raj 4/95 */
    187   if (debug > 1) {
    188     fprintf(where,
    189             "Looper child %d is born, pid %d\n",
    190             child_index,
    191             getpid());
    192     fflush(where);
    193   }
    194 
    195 #endif /* WIN32 */
    196 
    197   /* reset our base pointer to be at the appropriate offset */
    198   my_counter_ptr = (uint64_t *) ((char *)lib_base_pointer +
    199                              (netlib_get_page_size() *
    200                               PAGES_PER_CHILD * child_index));
    201 
    202   /* in the event we are running on an MP system, it would */
    203   /* probably be good to bind the soaker processes to specific */
    204   /* processors. I *think* this is the most reasonable thing to */
    205   /* do, and would be closes to simulating the information we get */
    206   /* on HP-UX with pstat. I could put all the system-specific code */
    207   /* here, but will "abstract it into another routine to keep this */
    208   /* area more readable. I'll probably do the same thine with the */
    209   /* "low pri code" raj 10/95 */
    210 
    211   /* since we are "flying blind" wrt where we should bind the looper
    212      processes, we want to use the cpu_map that was prepared by netlib
    213      rather than assume that the CPU ids on the system start at zero
    214      and are contiguous. raj 2006-04-03 */
    215   bind_to_specific_processor(child_index % lib_num_loc_cpus,1);
    216 
    217   for (*my_counter_ptr = 0L;
    218        ;
    219        (*my_counter_ptr)++) {
    220     if (!(*lib_base_pointer % 1)) {
    221       /* every once and again, make sure that our process priority is */
    222       /* nice and low. also, by making system calls, it may be easier */
    223       /* for us to be pre-empted by something that needs to do useful */
    224       /* work - like the thread of execution actually sending and */
    225       /* receiving data across the network :) */
    226 #ifdef _AIX
    227       int pid,prio;
    228 
    229       prio = PRIORITY;
    230       pid = getpid();
    231       /* if you are not root, this call will return EPERM - why one */
    232       /* cannot change one's own priority to  lower value is beyond */
    233       /* me. raj 2/26/96 */
    234       setpri(pid, prio);
    235 #else /* _AIX */
    236 #ifdef __sgi
    237       int pid,prio;
    238 
    239       prio = PRIORITY;
    240       pid = getpid();
    241       schedctl(NDPRI, pid, prio);
    242       sginap(0);
    243 #else /* __sgi */
    244 #ifdef WIN32
    245       SetThreadPriority(GetCurrentThread(),THREAD_PRIORITY_IDLE);
    246 #else /* WIN32 */
    247 #if defined(__sun) && defined(__SVR4)
    248 #include <sys/types.h>
    249 #include <sys/priocntl.h>
    250 #include <sys/rtpriocntl.h>
    251 #include <sys/tspriocntl.h>
    252       /* I would *really* like to know how to use priocntl to make the */
    253       /* priority low for this looper process. however, either my mind */
    254       /* is addled, or the manpage in section two for priocntl is not */
    255       /* terribly helpful - for one, it has no examples :( so, if you */
    256       /* can help, I'd love to hear from you. in the meantime, we will */
    257       /* rely on nice(39). raj 2/26/96 */
    258       nice(39);
    259 #else /* __sun && __SVR4 */
    260       nice(39);
    261 #endif /* __sun && _SVR4 */
    262 #endif /* WIN32 */
    263 #endif /* __sgi */
    264 #endif /* _AIX */
    265     }
    266   }
    267 }
    268 
    269 
    270 
    272  /* this routine will start all the looper processes or threads for */
    273  /* measuring CPU utilization. */
    274 
    275 static void
    276 start_looper_processes()
    277 {
    278 
    279   unsigned int      i, file_size;
    280 
    281   /* we want at least two pages for each processor. the */
    282   /* child for any one processor will write to the first of his two */
    283   /* pages, and the second page will be a buffer in case there is page */
    284   /* prefetching. if your system pre-fetches more than a single page, */
    285   /* well, you'll have to modify this or live with it :( raj 4/95 */
    286 
    287   file_size = ((netlib_get_page_size() * PAGES_PER_CHILD) *
    288                lib_num_loc_cpus);
    289 
    290 #ifndef WIN32
    291 
    292   /* we we are not using WINDOWS NT (or 95 actually :), then we want */
    293   /* to create a memory mapped region so we can see all the counting */
    294   /* rates of the loopers */
    295 
    296   /* could we just use an anonymous memory region for this? it is */
    297   /* possible that using a mmap()'ed "real" file, while convenient for */
    298   /* debugging, could result in some filesystem activity - like */
    299   /* metadata updates? raj 4/96 */
    300   lib_idle_fd = open("/tmp/netperf_cpu",O_RDWR | O_CREAT | O_EXCL);
    301 
    302   if (lib_idle_fd == -1) {
    303     fprintf(where,"create_looper: file creation; errno %d\n",errno);
    304     fflush(where);
    305     exit(1);
    306   }
    307 
    308   if (chmod("/tmp/netperf_cpu",0644) == -1) {
    309     fprintf(where,"create_looper: chmod; errno %d\n",errno);
    310     fflush(where);
    311     exit(1);
    312   }
    313 
    314   /* with the file descriptor in place, lets be sure that the file is */
    315   /* large enough. */
    316 
    317   if (truncate("/tmp/netperf_cpu",file_size) == -1) {
    318     fprintf(where,"create_looper: truncate: errno %d\n",errno);
    319     fflush(where);
    320     exit(1);
    321   }
    322 
    323   /* the file should be large enough now, so we can mmap it */
    324 
    325   /* if the system does not have MAP_VARIABLE, just define it to */
    326   /* be zero. it is only used/needed on HP-UX (?) raj 4/95 */
    327 #ifndef MAP_VARIABLE
    328 #define MAP_VARIABLE 0x0000
    329 #endif /* MAP_VARIABLE */
    330 #ifndef MAP_FILE
    331 #define MAP_FILE 0x0000
    332 #endif /* MAP_FILE */
    333   if ((lib_base_pointer = (long *)mmap(NULL,
    334                                        file_size,
    335                                        PROT_READ | PROT_WRITE,
    336                                        MAP_FILE | MAP_SHARED | MAP_VARIABLE,
    337                                        lib_idle_fd,
    338                                        0)) == (long *)-1) {
    339     fprintf(where,"create_looper: mmap: errno %d\n",errno);
    340     fflush(where);
    341     exit(1);
    342   }
    343 
    344 
    345   if (debug > 1) {
    346     fprintf(where,"num CPUs %d, file_size %d, lib_base_pointer %p\n",
    347             lib_num_loc_cpus,
    348             file_size,
    349             lib_base_pointer);
    350     fflush(where);
    351   }
    352 
    353   /* we should have a valid base pointer. lets fork */
    354 
    355   for (i = 0; i < (unsigned int)lib_num_loc_cpus; i++) {
    356     switch (lib_idle_pids[i] = fork()) {
    357     case -1:
    358       perror("netperf: fork");
    359       exit(1);
    360     case 0:
    361       /* we are the child. we could decide to exec some separate */
    362       /* program, but that doesn't really seem worthwhile - raj 4/95 */
    363 
    364       signal(SIGTERM, SIG_DFL);
    365       sit_and_spin(i);
    366 
    367       /* we should never really get here, but if we do, just exit(0) */
    368       exit(0);
    369       break;
    370     default:
    371       /* we must be the parent */
    372       lib_idle_address[i] = (uint64_t *) ((char *)lib_base_pointer +
    373                                       (netlib_get_page_size() *
    374                                        PAGES_PER_CHILD * i));
    375       if (debug) {
    376         fprintf(where,"lib_idle_address[%d] is %p\n",
    377                 i,
    378                 lib_idle_address[i]);
    379         fflush(where);
    380       }
    381     }
    382   }
    383 #else
    384   /* we are compiled -DWIN32 */
    385   if ((lib_base_pointer = malloc(file_size)) == NULL) {
    386     fprintf(where,
    387             "create_looper_process could not malloc %d bytes\n",
    388             file_size);
    389     fflush(where);
    390     exit(1);
    391   }
    392 
    393   /* now, create all the threads */
    394   for(i = 0; i < (unsigned int)lib_num_loc_cpus; i++) {
    395     long place_holder;
    396     if ((lib_idle_pids[i] = CreateThread(0,
    397                                          0,
    398                                          (LPTHREAD_START_ROUTINE)sit_and_spin,
    399                                          (LPVOID)(ULONG_PTR)i,
    400                                          0,
    401                                          &place_holder)) == NULL ) {
    402       fprintf(where,
    403               "create_looper_process: CreateThread failed\n");
    404       fflush(where);
    405       /* I wonder if I need to look for other threads to kill? */
    406       exit(1);
    407     }
    408     lib_idle_address[i] = (long *) ((char *)lib_base_pointer +
    409                                     (netlib_get_page_size() *
    410                                      PAGES_PER_CHILD * i));
    411     if (debug) {
    412       fprintf(where,"lib_idle_address[%d] is %p\n",
    413               i,
    414               lib_idle_address[i]);
    415       fflush(where);
    416     }
    417   }
    418 #endif /* WIN32 */
    419 
    420   /* we need to have the looper processes settled-in before we do */
    421   /* anything with them, so lets sleep for say 30 seconds. raj 4/95 */
    422 
    423   sleep(30);
    424 }
    425 
    426 void
    427 cpu_util_init(void)
    428 {
    429   cpu_method = LOOPER;
    430 
    431   /* we want to get the looper processes going */
    432   if (!lib_loopers_running) {
    433     start_looper_processes();
    434     lib_loopers_running = 1;
    435   }
    436 
    437   return;
    438 }
    439 
    440 /* clean-up any left-over CPU util resources - looper processes,
    441    files, whatever.  raj 2005-01-26 */
    442 void
    443 cpu_util_terminate() {
    444 
    445 #ifdef WIN32
    446   /* it would seem that if/when the process exits, all the threads */
    447   /* will go away too, so I don't think I need any explicit thread */
    448   /* killing calls here. raj 1/96 */
    449 #else
    450 
    451   int i;
    452 
    453   /* now go through and kill-off all the child processes */
    454   for (i = 0; i < lib_num_loc_cpus; i++){
    455     /* SIGKILL can leave core files behind - thanks to Steinar Haug */
    456     /* for pointing that out. */
    457     kill(lib_idle_pids[i],SIGTERM);
    458   }
    459   lib_loopers_running = 0;
    460   /* reap the children */
    461   while(waitpid(-1, NULL, WNOHANG) > 0) { }
    462 
    463   /* finally, unlink the mmaped file */
    464   munmap((caddr_t)lib_base_pointer,
    465          ((netlib_get_page_size() * PAGES_PER_CHILD) *
    466           lib_num_loc_cpus));
    467   unlink("/tmp/netperf_cpu");
    468 #endif
    469   return;
    470 }
    471 
    472 int
    473 get_cpu_method(void)
    474 {
    475   return LOOPER;
    476 }
    477 
    478  /* calibrate_looper */
    479 
    480  /* Loop a number of iterations, sleeping interval seconds each and */
    481  /* count how high the idle counter gets each time. Return  the */
    482  /* measured cpu rate to the calling routine. raj 4/95 */
    483 
    484 float
    485 calibrate_idle_rate (int iterations, int interval)
    486 {
    487 
    488   uint64_t
    489     firstcnt[MAXCPUS],
    490     secondcnt[MAXCPUS];
    491 
    492   float
    493     elapsed,
    494     temp_rate,
    495     rate[MAXTIMES],
    496     local_maxrate;
    497 
    498   long
    499     sec,
    500     usec;
    501 
    502   int
    503     i,
    504     j;
    505 
    506   struct  timeval time1, time2 ;
    507   struct  timezone tz;
    508 
    509   if (iterations > MAXTIMES) {
    510     iterations = MAXTIMES;
    511   }
    512 
    513   local_maxrate = (float)-1.0;
    514 
    515   for(i = 0; i < iterations; i++) {
    516     rate[i] = (float)0.0;
    517     for (j = 0; j < lib_num_loc_cpus; j++) {
    518       firstcnt[j] = *(lib_idle_address[j]);
    519     }
    520     gettimeofday (&time1, &tz);
    521     sleep(interval);
    522     gettimeofday (&time2, &tz);
    523 
    524     if (time2.tv_usec < time1.tv_usec)
    525       {
    526         time2.tv_usec += 1000000;
    527         time2.tv_sec -=1;
    528       }
    529     sec = time2.tv_sec - time1.tv_sec;
    530     usec = time2.tv_usec - time1.tv_usec;
    531     elapsed = (float)sec + ((float)usec/(float)1000000.0);
    532 
    533     if(debug) {
    534       fprintf(where, "Calibration for counter run: %d\n",i);
    535       fprintf(where,"\tsec = %ld usec = %ld\n",sec,usec);
    536       fprintf(where,"\telapsed time = %g\n",elapsed);
    537     }
    538 
    539     for (j = 0; j < lib_num_loc_cpus; j++) {
    540       secondcnt[j] = *(lib_idle_address[j]);
    541       if(debug) {
    542         /* I know that there are situations where compilers know about */
    543         /* long long, but the library fucntions do not... raj 4/95 */
    544         fprintf(where,
    545                 "\tfirstcnt[%d] = 0x%8.8lx%8.8lx secondcnt[%d] = 0x%8.8lx%8.8lx\n",
    546                 j,
    547                 (uint32_t)(firstcnt[j]>>32),
    548                 (uint32_t)(firstcnt[j]&0xffffffff),
    549                 j,
    550                 (uint32_t)(secondcnt[j]>>32),
    551                 (uint32_t)(secondcnt[j]&0xffffffff));
    552       }
    553       /* we assume that it would wrap no more than once. we also */
    554       /* assume that the result of subtracting will "fit" raj 4/95 */
    555       temp_rate = (secondcnt[j] >= firstcnt[j]) ?
    556         (float)(secondcnt[j] - firstcnt[j])/elapsed :
    557           (float)(secondcnt[j]-firstcnt[j]+MAXLONG)/elapsed;
    558       if (temp_rate > rate[i]) rate[i] = temp_rate;
    559       if(debug) {
    560         fprintf(where,"\trate[%d] = %g\n",i,rate[i]);
    561         fflush(where);
    562       }
    563       if (local_maxrate < rate[i]) local_maxrate = rate[i];
    564     }
    565   }
    566   if(debug) {
    567     fprintf(where,"\tlocal maxrate = %g per sec. \n",local_maxrate);
    568     fflush(where);
    569   }
    570   return local_maxrate;
    571 }
    572 
    573 
    574 void
    575 get_cpu_idle (uint64_t *res)
    576 {
    577   int i;
    578 
    579   for (i = 0; i < lib_num_loc_cpus; i++){
    580     res[i] = *lib_idle_address[i];
    581   }
    582 
    583 }
    584 
    585 float
    586 calc_cpu_util_internal(float elapsed_time)
    587 {
    588   int i;
    589   float correction_factor;
    590   float actual_rate;
    591 
    592   lib_local_cpu_util = (float)0.0;
    593   /* It is possible that the library measured a time other than */
    594   /* the one that the user want for the cpu utilization */
    595   /* calculations - for example, tests that were ended by */
    596   /* watchdog timers such as the udp stream test. We let these */
    597   /* tests tell up what the elapsed time should be. */
    598 
    599   if (elapsed_time != 0.0) {
    600     correction_factor = (float) 1.0 +
    601       ((lib_elapsed - elapsed_time) / elapsed_time);
    602   }
    603   else {
    604     correction_factor = (float) 1.0;
    605   }
    606 
    607   for (i = 0; i < lib_num_loc_cpus; i++) {
    608 
    609     /* it would appear that on some systems, in loopback, nice is
    610      *very* effective, causing the looper process to stop dead in its
    611      tracks. if this happens, we need to ensure that the calculation
    612      does not go south. raj 6/95 and if we run completely out of idle,
    613      the same thing could in theory happen to the USE_KSTAT path. raj
    614      8/2000 */
    615 
    616     if (lib_end_count[i] == lib_start_count[i]) {
    617       lib_end_count[i]++;
    618     }
    619 
    620     actual_rate = (lib_end_count[i] > lib_start_count[i]) ?
    621       (float)(lib_end_count[i] - lib_start_count[i])/lib_elapsed :
    622       (float)(lib_end_count[i] - lib_start_count[i] +
    623 	      MAXLONG)/ lib_elapsed;
    624     if (debug) {
    625       fprintf(where,
    626               "calc_cpu_util: actual_rate on processor %d is %f start 0x%8.8lx%8.8lx end 0x%8.8lx%8.8lx\n",
    627               i,
    628               actual_rate,
    629               (uint32_t)(lib_start_count[i]>>32),
    630               (uint32_t)(lib_start_count[i]&0xffffffff),
    631               (uint32_t)(lib_end_count[i]>>32),
    632               (uint32_t)(lib_end_count[i]&0xffffffff));
    633     }
    634     lib_local_per_cpu_util[i] = (lib_local_maxrate - actual_rate) /
    635       lib_local_maxrate * 100;
    636     lib_local_cpu_util += lib_local_per_cpu_util[i];
    637   }
    638   /* we want the average across all n processors */
    639   lib_local_cpu_util /= (float)lib_num_loc_cpus;
    640 
    641   lib_local_cpu_util *= correction_factor;
    642   return lib_local_cpu_util;
    643 
    644 
    645 }
    646 void
    647 cpu_start_internal(void)
    648 {
    649   get_cpu_idle(lib_start_count);
    650   return;
    651 }
    652 
    653 void
    654 cpu_stop_internal(void)
    655 {
    656   get_cpu_idle(lib_end_count);
    657 }
    658