Home | History | Annotate | Download | only in examples
      1 # this is a quick and dirty migration of runemomniagg2.sh to the 
      2 # --enable-demo mode of aggregate testing
      3 function kill_netperfs {
      4     pkill -ALRM netperf
      5 
      6     pgrep -P 1 -f netperf > /dev/null
      7     while [ $? -eq 0 ]
      8     do
      9 	sleep 1
     10 	pgrep -P 1 -f netperf > /dev/null
     11     done
     12 }
     13 
     14 function run_cmd {
     15 
     16     NOW=`date +%s.%N`
     17     echo "Starting netperfs at $NOW for $TEST" | tee $TESTLOG
     18     i=0;
     19 
     20 # the starting point for our load level pauses
     21     PAUSE_AT=1
     22 
     23 
     24     while [ $i -lt $MAX_INSTANCES ]
     25     do
     26 	TARGET=${REMOTE_HOSTS[`expr $i % $NUM_REMOTE_HOSTS`]}
     27 	echo "Starting netperfs on localhost targeting ${TARGET} for $TEST" | tee -a $TESTLOG
     28 	id=`printf "%.5d" $i`
     29 	$NETPERF -H $TARGET $NETPERF_CMD 2>&1 > netperf_${TEST}_${id}_to_${TARGET}.out &
     30 
     31     # give it a moment to get going
     32 	sleep 1
     33 
     34 	i=`expr $i + 1`
     35 
     36 	if [ $i  -eq $PAUSE_AT ] && [ $i -ne $MAX_INSTANCES ]
     37 	then
     38 	    NOW=`date +%s.%N`
     39 	    echo "Pausing for $DURATION seconds at $NOW with $i netperfs running for $TEST" | tee -a $TESTLOG
     40 	    sleep $DURATION
     41 	    PAUSE_AT=`expr $PAUSE_AT \* 2`
     42 	    NOW=`date +%s.%N`
     43 	    echo "Resuming at $NOW for $TEST" | tee -a $TESTLOG
     44 	fi    
     45     done
     46 
     47     NOW=`date +%s.%N`
     48     echo "Netperfs started by $NOW for $TEST" | tee -a $TESTLOG
     49 
     50 #wait for our test duration
     51     sleep $DURATION
     52 
     53 #kludgey but this sleep should mean that another interim result will be emitted
     54     sleep 3
     55 
     56 # stop all the netperfs
     57     NOW=`date +%s.%N`
     58     echo "Netperfs stopping $NOW for $TEST" | tee -a $TESTLOG
     59     kill_netperfs
     60     
     61     NOW=`date +%s.%N`
     62     echo "Netperfs stopped $NOW for $TEST" | tee -a $TESTLOG
     63 
     64 }
     65 
     66 # very much like run_cmd, but it runs the tests one at a time rather
     67 # than in parallel.  We keep the same logging strings to be compatible
     68 # (hopefully) with the post processing script, even though they don't
     69 # make all that much sense :)
     70 
     71 function run_cmd_serial {
     72 
     73     NOW=`date +%s.%N`
     74     echo "Starting netperfs at $NOW for $TEST" | tee $TESTLOG
     75     i=0;
     76 
     77 # the starting point for our load level pauses
     78     PAUSE_AT=1
     79 
     80 
     81     while [ $i -lt $NUM_REMOTE_HOSTS ]
     82     do
     83 	TARGET=${REMOTE_HOSTS[`expr $i % $NUM_REMOTE_HOSTS`]}
     84 	echo "Starting netperfs on localhost targeting ${TARGET} for $TEST" | tee -a $TESTLOG
     85 	id=`printf "%.5d" $i`
     86 	$NETPERF -H $TARGET $NETPERF_CMD 2>&1 > netperf_${TEST}_${id}_to_${TARGET}.out &
     87 
     88     # give it a moment to get going
     89 	sleep 1
     90 
     91 	i=`expr $i + 1`
     92 
     93 	NOW=`date +%s.%N`
     94 	echo "Pausing for $DURATION seconds at $NOW with $i netperfs running for $TEST" | tee -a $TESTLOG
     95 	# the plus two is to make sure we have a full set of interim
     96 	# results.  probably not necessary here but we want to be
     97 	# certain
     98 	sleep `expr $DURATION + 1`
     99 	kill_netperfs
    100 	NOW=`date +%s.%N`
    101 	THEN=`echo $NOW | awk -F "." '{printf("%d.%d",$1-1,$2)}'`
    102 	echo "Resuming at $THEN for $TEST" | tee -a $TESTLOG
    103 
    104     done
    105 
    106     NOW=`date +%s.%N`
    107     echo "Netperfs started by $NOW for $TEST" | tee -a $TESTLOG
    108 
    109 # stop all the netperfs - of course actually they have all been
    110 # stopped already, we just want the log entries
    111     NOW=`date +%s.%N`
    112     echo "Netperfs stopping $NOW for $TEST" | tee -a $TESTLOG
    113     kill_netperfs
    114     NOW=`date +%s.%N`
    115     echo "Netperfs stopped $NOW for $TEST" | tee -a $TESTLOG
    116 }
    117 
    118 # here then is the "main" part
    119 
    120 if [ ! -f ./remote_hosts ]
    121 then
    122     echo "This script requires a remote_hosts file"
    123     exit -1
    124 fi
    125 . ./remote_hosts
    126 
    127 # how many processors are there on this system
    128 NUM_CPUS=`grep processor /proc/cpuinfo | wc -l`
    129 
    130 # the number of netperf instances we will run will be up to 2x the
    131 # number of CPUs
    132 MAX_INSTANCES=`expr $NUM_CPUS \* 2`
    133 
    134 # but at least as many as there are entries in remote_hosts
    135 if [ $MAX_INSTANCES -lt $NUM_REMOTE_HOSTS ]
    136 then
    137     MAX_INSTANCES=$NUM_REMOTE_HOSTS
    138 fi
    139 
    140 # allow the netperf binary to be used to be overridden 
    141 NETPERF=${NETPERF:="netperf"}
    142 
    143 if [ $NUM_REMOTE_HOSTS -lt 2 ]
    144 then
    145     echo "The list of remote hosts is too short.  There must be at least 2."
    146     exit -1
    147 fi
    148 
    149 # we assume that netservers are already running on all the load generators
    150 
    151 DURATION=120
    152 # do not have a uuidgen? then use the one in netperf
    153 MY_UUID=`uuidgen`
    154 # with top-of-trunk we could make this 0 and run forever
    155 # but two hours is something of a failsafe if the signals
    156 # get lost
    157 LENGTH="-l 7200"
    158 OUTPUT="-o all"
    159 
    160 DO_STREAM=1;
    161 DO_MAERTS=1;
    162 # NOTE!  The Bidir test depends on being able to set a socket buffer
    163 # size greater than 13 * 64KB or 832 KB or there is a risk of the test
    164 # hanging.  If you are running linux, make certain that
    165 # net.core.[r|w]mem_max are sufficiently large
    166 DO_BIDIR=1;
    167 DO_RRAGG=1;
    168 DO_RR=1;
    169 DO_ANCILLARY=1;
    170 
    171 # UDP_RR for TPC/PPS using single-byte transactions. we do not use
    172 # TCP_RR any longer because any packet losses or other matters
    173 # affecting the congestion window will break our desire that there be
    174 # a one to one correspondence between requests/responses and packets.
    175 if [ $DO_RRAGG -eq 1 ]; then
    176     BURST=`find_max_burst.sh ${REMOTE_HOSTS[0]}`
    177     if [ $BURST -eq -1 ]; then
    178         # use a value that find_max_burst will not have picked
    179         BURST=9
    180         echo "find_max_burst.sh returned -1 so picking a burst of $BURST"
    181     fi
    182     TEST="tps"
    183     TESTLOG="netperf_tps.log"
    184     NETPERF_CMD="-D 0.5 -c -C -f x -P 0 -t omni $LENGTH -v 2 -- -r 1 -b $BURST -e 1 -T udp -u $MY_UUID $OUTPUT"
    185     run_cmd
    186 fi
    187 
    188 # Bidirectional using burst-mode TCP_RR and large request/response size
    189 if [ $DO_BIDIR -eq 1 ]; then
    190     TEST="bidirectional"
    191     TESTLOG="netperf_bidirectional.log"
    192     NETPERF_CMD="-D 0.5 -c -C -f m -P 0 -t omni $LENGTH -v 2 -- -r 64K -s 1M -S 1M -b 12 -u $MY_UUID $OUTPUT"
    193     run_cmd
    194 fi
    195 
    196 # TCP_STREAM aka outbound with a 64K send size
    197 # the netperf command is everything but netperf -H mumble
    198 if [ $DO_STREAM -eq 1 ];then
    199     TEST="outbound"
    200     TESTLOG="netperf_outbound.log"
    201     NETPERF_CMD="-D 0.5 -c -C -f m -P 0 -t omni $LENGTH -v 2 -- -m 64K -u $MY_UUID $OUTPUT"
    202     run_cmd
    203 fi
    204 
    205 # TCP_MAERTS aka inbound with a 64K send size - why is this one last?
    206 # because presently when I pkill the netperf of a "MAERTS" test, the
    207 # netserver does not behave well and it may not be possible to get it
    208 # to behave well.  but we will still have all the interim results even
    209 # if we don't get the final results, the useful parts of which will be
    210 # the same as the other tests anyway
    211 if [ $DO_MAERTS -eq 1 ]; then
    212     TEST="inbound"
    213     TESTLOG="netperf_inbound.log"
    214     NETPERF_CMD="-D 0.5 -c -C -f m -P 0 -t omni $LENGTH -v 2 -- -m ,64K -u $MY_UUID $OUTPUT"
    215     run_cmd
    216 fi
    217 
    218 # A single-stream of synchronous, no-burst TCP_RR in an "aggregate"
    219 # script?  Yes, because the way the aggregate tests work, while there
    220 # is a way to see what the performance of a single bulk transfer was,
    221 # there is no way to see a basic latency - by the time
    222 # find_max_burst.sh has completed, we are past a burst size of 0
    223 if [ $DO_RR -eq 1 ]; then
    224     if [ $DURATION -lt 60 ]; then
    225 	DURATION=60
    226     fi
    227     TEST="sync_tps"
    228     TESTLOG="netperf_sync_tps.log"
    229     NETPERF_CMD="-D 0.5 -c -C -f x -P 0 -t omni $LENGTH -v 2 -- -r 1 -u $MY_UUID $OUTPUT"
    230     run_cmd_serial
    231 fi
    232 
    233 
    234 # now some ancillary things which may nor may not work on your platform
    235 if [ $DO_ANCILLARY -eq 1 ];then
    236     dmidecode 2>&1 > dmidecode.txt
    237     uname -a 2>&1 > uname.txt
    238     cat /proc/cpuinfo 2>&1 > cpuinfo.txt
    239     cat /proc/meminfo 2>&1 > meminfo.txt
    240     ifconfig -a 2>&1 > ifconfig.txt
    241     netstat -rn 2>&1 > netstat.txt
    242     dpkg -l 2>&1 > dpkg.txt
    243     rpm -qa 2>&1 > rpm.txt
    244     cat /proc/interrupts 2>&1 > interrupts.txt
    245     i=0
    246     while [ $i -lt `expr $NUM_REMOTE_HOSTS - 1` ]
    247     do
    248 	traceroute ${REMOTE_HOSTS[$i]} > traceroute_${REMOTE_HOSTS[$i]}.txt
    249 	i=`expr $i + 1`
    250     done
    251 fi
    252