1 # this is a quick and dirty migration of runemomniagg2.sh to the 2 # --enable-demo mode of aggregate testing 3 function kill_netperfs { 4 pkill -ALRM netperf 5 6 pgrep -P 1 -f netperf > /dev/null 7 while [ $? -eq 0 ] 8 do 9 sleep 1 10 pgrep -P 1 -f netperf > /dev/null 11 done 12 } 13 14 function run_cmd { 15 16 NOW=`date +%s.%N` 17 echo "Starting netperfs at $NOW for $TEST" | tee $TESTLOG 18 i=0; 19 20 # the starting point for our load level pauses 21 PAUSE_AT=1 22 23 24 while [ $i -lt $MAX_INSTANCES ] 25 do 26 TARGET=${REMOTE_HOSTS[`expr $i % $NUM_REMOTE_HOSTS`]} 27 echo "Starting netperfs on localhost targeting ${TARGET} for $TEST" | tee -a $TESTLOG 28 id=`printf "%.5d" $i` 29 $NETPERF -H $TARGET $NETPERF_CMD 2>&1 > netperf_${TEST}_${id}_to_${TARGET}.out & 30 31 # give it a moment to get going 32 sleep 1 33 34 i=`expr $i + 1` 35 36 if [ $i -eq $PAUSE_AT ] && [ $i -ne $MAX_INSTANCES ] 37 then 38 NOW=`date +%s.%N` 39 echo "Pausing for $DURATION seconds at $NOW with $i netperfs running for $TEST" | tee -a $TESTLOG 40 sleep $DURATION 41 PAUSE_AT=`expr $PAUSE_AT \* 2` 42 NOW=`date +%s.%N` 43 echo "Resuming at $NOW for $TEST" | tee -a $TESTLOG 44 fi 45 done 46 47 NOW=`date +%s.%N` 48 echo "Netperfs started by $NOW for $TEST" | tee -a $TESTLOG 49 50 #wait for our test duration 51 sleep $DURATION 52 53 #kludgey but this sleep should mean that another interim result will be emitted 54 sleep 3 55 56 # stop all the netperfs 57 NOW=`date +%s.%N` 58 echo "Netperfs stopping $NOW for $TEST" | tee -a $TESTLOG 59 kill_netperfs 60 61 NOW=`date +%s.%N` 62 echo "Netperfs stopped $NOW for $TEST" | tee -a $TESTLOG 63 64 } 65 66 # very much like run_cmd, but it runs the tests one at a time rather 67 # than in parallel. We keep the same logging strings to be compatible 68 # (hopefully) with the post processing script, even though they don't 69 # make all that much sense :) 70 71 function run_cmd_serial { 72 73 NOW=`date +%s.%N` 74 echo "Starting netperfs at $NOW for $TEST" | tee $TESTLOG 75 i=0; 76 77 # the starting point for our load level pauses 78 PAUSE_AT=1 79 80 81 while [ $i -lt $NUM_REMOTE_HOSTS ] 82 do 83 TARGET=${REMOTE_HOSTS[`expr $i % $NUM_REMOTE_HOSTS`]} 84 echo "Starting netperfs on localhost targeting ${TARGET} for $TEST" | tee -a $TESTLOG 85 id=`printf "%.5d" $i` 86 $NETPERF -H $TARGET $NETPERF_CMD 2>&1 > netperf_${TEST}_${id}_to_${TARGET}.out & 87 88 # give it a moment to get going 89 sleep 1 90 91 i=`expr $i + 1` 92 93 NOW=`date +%s.%N` 94 echo "Pausing for $DURATION seconds at $NOW with $i netperfs running for $TEST" | tee -a $TESTLOG 95 # the plus two is to make sure we have a full set of interim 96 # results. probably not necessary here but we want to be 97 # certain 98 sleep `expr $DURATION + 1` 99 kill_netperfs 100 NOW=`date +%s.%N` 101 THEN=`echo $NOW | awk -F "." '{printf("%d.%d",$1-1,$2)}'` 102 echo "Resuming at $THEN for $TEST" | tee -a $TESTLOG 103 104 done 105 106 NOW=`date +%s.%N` 107 echo "Netperfs started by $NOW for $TEST" | tee -a $TESTLOG 108 109 # stop all the netperfs - of course actually they have all been 110 # stopped already, we just want the log entries 111 NOW=`date +%s.%N` 112 echo "Netperfs stopping $NOW for $TEST" | tee -a $TESTLOG 113 kill_netperfs 114 NOW=`date +%s.%N` 115 echo "Netperfs stopped $NOW for $TEST" | tee -a $TESTLOG 116 } 117 118 # here then is the "main" part 119 120 if [ ! -f ./remote_hosts ] 121 then 122 echo "This script requires a remote_hosts file" 123 exit -1 124 fi 125 . ./remote_hosts 126 127 # how many processors are there on this system 128 NUM_CPUS=`grep processor /proc/cpuinfo | wc -l` 129 130 # the number of netperf instances we will run will be up to 2x the 131 # number of CPUs 132 MAX_INSTANCES=`expr $NUM_CPUS \* 2` 133 134 # but at least as many as there are entries in remote_hosts 135 if [ $MAX_INSTANCES -lt $NUM_REMOTE_HOSTS ] 136 then 137 MAX_INSTANCES=$NUM_REMOTE_HOSTS 138 fi 139 140 # allow the netperf binary to be used to be overridden 141 NETPERF=${NETPERF:="netperf"} 142 143 if [ $NUM_REMOTE_HOSTS -lt 2 ] 144 then 145 echo "The list of remote hosts is too short. There must be at least 2." 146 exit -1 147 fi 148 149 # we assume that netservers are already running on all the load generators 150 151 DURATION=120 152 # do not have a uuidgen? then use the one in netperf 153 MY_UUID=`uuidgen` 154 # with top-of-trunk we could make this 0 and run forever 155 # but two hours is something of a failsafe if the signals 156 # get lost 157 LENGTH="-l 7200" 158 OUTPUT="-o all" 159 160 DO_STREAM=1; 161 DO_MAERTS=1; 162 # NOTE! The Bidir test depends on being able to set a socket buffer 163 # size greater than 13 * 64KB or 832 KB or there is a risk of the test 164 # hanging. If you are running linux, make certain that 165 # net.core.[r|w]mem_max are sufficiently large 166 DO_BIDIR=1; 167 DO_RRAGG=1; 168 DO_RR=1; 169 DO_ANCILLARY=1; 170 171 # UDP_RR for TPC/PPS using single-byte transactions. we do not use 172 # TCP_RR any longer because any packet losses or other matters 173 # affecting the congestion window will break our desire that there be 174 # a one to one correspondence between requests/responses and packets. 175 if [ $DO_RRAGG -eq 1 ]; then 176 BURST=`find_max_burst.sh ${REMOTE_HOSTS[0]}` 177 if [ $BURST -eq -1 ]; then 178 # use a value that find_max_burst will not have picked 179 BURST=9 180 echo "find_max_burst.sh returned -1 so picking a burst of $BURST" 181 fi 182 TEST="tps" 183 TESTLOG="netperf_tps.log" 184 NETPERF_CMD="-D 0.5 -c -C -f x -P 0 -t omni $LENGTH -v 2 -- -r 1 -b $BURST -e 1 -T udp -u $MY_UUID $OUTPUT" 185 run_cmd 186 fi 187 188 # Bidirectional using burst-mode TCP_RR and large request/response size 189 if [ $DO_BIDIR -eq 1 ]; then 190 TEST="bidirectional" 191 TESTLOG="netperf_bidirectional.log" 192 NETPERF_CMD="-D 0.5 -c -C -f m -P 0 -t omni $LENGTH -v 2 -- -r 64K -s 1M -S 1M -b 12 -u $MY_UUID $OUTPUT" 193 run_cmd 194 fi 195 196 # TCP_STREAM aka outbound with a 64K send size 197 # the netperf command is everything but netperf -H mumble 198 if [ $DO_STREAM -eq 1 ];then 199 TEST="outbound" 200 TESTLOG="netperf_outbound.log" 201 NETPERF_CMD="-D 0.5 -c -C -f m -P 0 -t omni $LENGTH -v 2 -- -m 64K -u $MY_UUID $OUTPUT" 202 run_cmd 203 fi 204 205 # TCP_MAERTS aka inbound with a 64K send size - why is this one last? 206 # because presently when I pkill the netperf of a "MAERTS" test, the 207 # netserver does not behave well and it may not be possible to get it 208 # to behave well. but we will still have all the interim results even 209 # if we don't get the final results, the useful parts of which will be 210 # the same as the other tests anyway 211 if [ $DO_MAERTS -eq 1 ]; then 212 TEST="inbound" 213 TESTLOG="netperf_inbound.log" 214 NETPERF_CMD="-D 0.5 -c -C -f m -P 0 -t omni $LENGTH -v 2 -- -m ,64K -u $MY_UUID $OUTPUT" 215 run_cmd 216 fi 217 218 # A single-stream of synchronous, no-burst TCP_RR in an "aggregate" 219 # script? Yes, because the way the aggregate tests work, while there 220 # is a way to see what the performance of a single bulk transfer was, 221 # there is no way to see a basic latency - by the time 222 # find_max_burst.sh has completed, we are past a burst size of 0 223 if [ $DO_RR -eq 1 ]; then 224 if [ $DURATION -lt 60 ]; then 225 DURATION=60 226 fi 227 TEST="sync_tps" 228 TESTLOG="netperf_sync_tps.log" 229 NETPERF_CMD="-D 0.5 -c -C -f x -P 0 -t omni $LENGTH -v 2 -- -r 1 -u $MY_UUID $OUTPUT" 230 run_cmd_serial 231 fi 232 233 234 # now some ancillary things which may nor may not work on your platform 235 if [ $DO_ANCILLARY -eq 1 ];then 236 dmidecode 2>&1 > dmidecode.txt 237 uname -a 2>&1 > uname.txt 238 cat /proc/cpuinfo 2>&1 > cpuinfo.txt 239 cat /proc/meminfo 2>&1 > meminfo.txt 240 ifconfig -a 2>&1 > ifconfig.txt 241 netstat -rn 2>&1 > netstat.txt 242 dpkg -l 2>&1 > dpkg.txt 243 rpm -qa 2>&1 > rpm.txt 244 cat /proc/interrupts 2>&1 > interrupts.txt 245 i=0 246 while [ $i -lt `expr $NUM_REMOTE_HOSTS - 1` ] 247 do 248 traceroute ${REMOTE_HOSTS[$i]} > traceroute_${REMOTE_HOSTS[$i]}.txt 249 i=`expr $i + 1` 250 done 251 fi 252