1 2 /* 3 * This is the latest version of hackbench.c, that tests scheduler and 4 * unix-socket (or pipe) performance. 5 * 6 * Usage: hackbench [-pipe] <num groups> [process|thread] [loops] 7 * 8 * Build it with: 9 * gcc -g -Wall -O2 -o hackbench hackbench.c -lpthread 10 */ 11 #if 0 12 13 Date: Fri, 04 Jan 2008 14:06:26 +0800 14 From: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com> 15 To: LKML <linux-kernel@vger.kernel.org> 16 Subject: Improve hackbench 17 Cc: Ingo Molnar <mingo@elte.hu>, Arjan van de Ven <arjan@infradead.org> 18 19 hackbench tests the Linux scheduler. The original program is at 20 http://devresources.linux-foundation.org/craiger/hackbench/src/hackbench.c 21 Based on this multi-process version, a nice person created a multi-thread 22 version. Pls. see 23 http://www.bullopensource.org/posix/pi-futex/hackbench_pth.c 24 25 When I integrated them into my automation testing system, I found 26 a couple of issues and did some improvements. 27 28 1) Merge hackbench: I integrated hackbench_pth.c into hackbench and added a 29 new parameter which can be used to choose process mode or thread mode. The 30 default mode is process. 31 32 2) It runs too fast and ends in a couple of seconds. Sometimes it's too hard to debug 33 the issues. On my ia64 Montecito machines, the result looks weird when comparing 34 process mode and thread mode. 35 I want a stable result and hope the testing could run for a stable longer time, so I 36 might use performance tools to debug issues. 37 I added another new parameter,`loops`, which can be used to change variable loops, 38 so more messages will be passed from writers to receivers. Parameter 'loops' is equal to 39 100 by default. 40 41 For example on my 8-core x86_64: 42 [ymzhang@lkp-st01-x8664 hackbench]$ uname -a 43 Linux lkp-st01-x8664 2.6.24-rc6 #1 SMP Fri Dec 21 08:32:31 CST 2007 x86_64 x86_64 x86_64 GNU/Linux 44 [ymzhang@lkp-st01-x8664 hackbench]$ ./hackbench 45 Usage: hackbench [-pipe] <num groups> [process|thread] [loops] 46 [ymzhang@lkp-st01-x8664 hackbench]$ ./hackbench 150 process 1000 47 Time: 151.533 48 [ymzhang@lkp-st01-x8664 hackbench]$ ./hackbench 150 thread 1000 49 Time: 153.666 50 51 52 With the same new parameters, I did captured the SLUB issue discussed on LKML recently. 53 54 3) hackbench_pth.c will fail on ia64 machine because pthread_attr_setstacksize always 55 fails if the stack size is less than 196*1024. I moved this statement within a __ia64__ check. 56 57 58 This new program could be compiled with command line: 59 #gcc -g -Wall -o hackbench hackbench.c -lpthread 60 61 62 Thank Ingo for his great comments! 63 64 -yanmin 65 66 --- 67 68 * Nathan Lynch <ntl (at) pobox.com> wrote: 69 70 > Here's a fixlet for the hackbench program found at 71 > 72 > http://people.redhat.com/mingo/cfs-scheduler/tools/hackbench.c 73 > 74 > When redirecting hackbench output I am seeing multiple copies of the 75 > "Running with %d*40 (== %d) tasks" line. Need to flush the buffered 76 > output before forking. 77 78 #endif 79 80 /* Test groups of 20 processes spraying to 20 receivers */ 81 #include <pthread.h> 82 #include <stdio.h> 83 #include <stdlib.h> 84 #include <string.h> 85 #include <errno.h> 86 #include <unistd.h> 87 #include <sys/types.h> 88 #include <sys/socket.h> 89 #include <sys/wait.h> 90 #include <sys/time.h> 91 #include <sys/poll.h> 92 #include <limits.h> 93 94 #define DATASIZE 100 95 static unsigned int loops = 100; 96 /* 97 * 0 means thread mode and others mean process (default) 98 */ 99 static unsigned int process_mode = 1; 100 101 static int use_pipes = 0; 102 103 struct sender_context { 104 unsigned int num_fds; 105 int ready_out; 106 int wakefd; 107 int out_fds[0]; 108 }; 109 110 struct receiver_context { 111 unsigned int num_packets; 112 int in_fds[2]; 113 int ready_out; 114 int wakefd; 115 }; 116 117 118 static void barf(const char *msg) 119 { 120 fprintf(stderr, "%s (error: %s)\n", msg, strerror(errno)); 121 exit(1); 122 } 123 124 static void print_usage_exit() 125 { 126 printf("Usage: hackbench [-pipe] <num groups> [process|thread] [loops]\n"); 127 exit(1); 128 } 129 130 static void fdpair(int fds[2]) 131 { 132 if (use_pipes) { 133 if (pipe(fds) == 0) 134 return; 135 } else { 136 if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) == 0) 137 return; 138 } 139 barf("Creating fdpair"); 140 } 141 142 /* Block until we're ready to go */ 143 static void ready(int ready_out, int wakefd) 144 { 145 char dummy; 146 struct pollfd pollfd = { .fd = wakefd, .events = POLLIN }; 147 148 /* Tell them we're ready. */ 149 if (write(ready_out, &dummy, 1) != 1) 150 barf("CLIENT: ready write"); 151 152 /* Wait for "GO" signal */ 153 if (poll(&pollfd, 1, -1) != 1) 154 barf("poll"); 155 } 156 157 /* Sender sprays loops messages down each file descriptor */ 158 static void *sender(struct sender_context *ctx) 159 { 160 char data[DATASIZE]; 161 unsigned int i, j; 162 163 ready(ctx->ready_out, ctx->wakefd); 164 165 /* Now pump to every receiver. */ 166 for (i = 0; i < loops; i++) { 167 for (j = 0; j < ctx->num_fds; j++) { 168 int ret, done = 0; 169 170 again: 171 ret = write(ctx->out_fds[j], data + done, sizeof(data)-done); 172 if (ret < 0) 173 barf("SENDER: write"); 174 done += ret; 175 if (done < sizeof(data)) 176 goto again; 177 } 178 } 179 180 return NULL; 181 } 182 183 184 /* One receiver per fd */ 185 static void *receiver(struct receiver_context* ctx) 186 { 187 unsigned int i; 188 189 if (process_mode) 190 close(ctx->in_fds[1]); 191 192 /* Wait for start... */ 193 ready(ctx->ready_out, ctx->wakefd); 194 195 /* Receive them all */ 196 for (i = 0; i < ctx->num_packets; i++) { 197 char data[DATASIZE]; 198 int ret, done = 0; 199 200 again: 201 ret = read(ctx->in_fds[0], data + done, DATASIZE - done); 202 if (ret < 0) 203 barf("SERVER: read"); 204 done += ret; 205 if (done < DATASIZE) 206 goto again; 207 } 208 209 return NULL; 210 } 211 212 pthread_t create_worker(void *ctx, void *(*func)(void *)) 213 { 214 pthread_attr_t attr; 215 pthread_t childid; 216 int err; 217 218 if (process_mode) { 219 /* process mode */ 220 /* Fork the receiver. */ 221 switch (fork()) { 222 case -1: barf("fork()"); 223 case 0: 224 (*func) (ctx); 225 exit(0); 226 } 227 228 return (pthread_t) 0; 229 } 230 231 if (pthread_attr_init(&attr) != 0) 232 barf("pthread_attr_init:"); 233 234 #ifndef __ia64__ 235 if (pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0) 236 barf("pthread_attr_setstacksize"); 237 #endif 238 239 if ((err=pthread_create(&childid, &attr, func, ctx)) != 0) { 240 fprintf(stderr, "pthread_create failed: %s (%d)\n", strerror(err), err); 241 exit(-1); 242 } 243 return (childid); 244 } 245 246 void reap_worker(pthread_t id) 247 { 248 int status; 249 250 if (process_mode) { 251 /* process mode */ 252 wait(&status); 253 if (!WIFEXITED(status)) 254 exit(1); 255 } else { 256 void *status; 257 258 pthread_join(id, &status); 259 } 260 } 261 262 /* One group of senders and receivers */ 263 static unsigned int group(pthread_t *pth, 264 unsigned int num_fds, 265 int ready_out, 266 int wakefd) 267 { 268 unsigned int i; 269 struct sender_context* snd_ctx = malloc (sizeof(struct sender_context) 270 +num_fds*sizeof(int)); 271 272 for (i = 0; i < num_fds; i++) { 273 int fds[2]; 274 struct receiver_context* ctx = malloc (sizeof(*ctx)); 275 276 if (!ctx) 277 barf("malloc()"); 278 279 280 /* Create the pipe between client and server */ 281 fdpair(fds); 282 283 ctx->num_packets = num_fds*loops; 284 ctx->in_fds[0] = fds[0]; 285 ctx->in_fds[1] = fds[1]; 286 ctx->ready_out = ready_out; 287 ctx->wakefd = wakefd; 288 289 pth[i] = create_worker(ctx, (void *)(void *)receiver); 290 291 snd_ctx->out_fds[i] = fds[1]; 292 if (process_mode) 293 close(fds[0]); 294 } 295 296 /* Now we have all the fds, fork the senders */ 297 for (i = 0; i < num_fds; i++) { 298 snd_ctx->ready_out = ready_out; 299 snd_ctx->wakefd = wakefd; 300 snd_ctx->num_fds = num_fds; 301 302 pth[num_fds+i] = create_worker(snd_ctx, (void *)(void *)sender); 303 } 304 305 /* Close the fds we have left */ 306 if (process_mode) 307 for (i = 0; i < num_fds; i++) 308 close(snd_ctx->out_fds[i]); 309 310 /* Return number of children to reap */ 311 return num_fds * 2; 312 } 313 314 int main(int argc, char *argv[]) 315 { 316 unsigned int i, num_groups = 10, total_children; 317 struct timeval start, stop, diff; 318 unsigned int num_fds = 20; 319 int readyfds[2], wakefds[2]; 320 char dummy; 321 pthread_t *pth_tab; 322 323 if (argv[1] && strcmp(argv[1], "-pipe") == 0) { 324 use_pipes = 1; 325 argc--; 326 argv++; 327 } 328 329 if (argc >= 2 && (num_groups = atoi(argv[1])) == 0) 330 print_usage_exit(); 331 332 printf("Running with %d*40 (== %d) tasks.\n", 333 num_groups, num_groups*40); 334 335 fflush(NULL); 336 337 if (argc > 2) { 338 if ( !strcmp(argv[2], "process") ) 339 process_mode = 1; 340 else if ( !strcmp(argv[2], "thread") ) 341 process_mode = 0; 342 else 343 print_usage_exit(); 344 } 345 346 if (argc > 3) 347 loops = atoi(argv[3]); 348 349 pth_tab = malloc(num_fds * 2 * num_groups * sizeof(pthread_t)); 350 351 if (!pth_tab) 352 barf("main:malloc()"); 353 354 fdpair(readyfds); 355 fdpair(wakefds); 356 357 total_children = 0; 358 for (i = 0; i < num_groups; i++) 359 total_children += group(pth_tab+total_children, num_fds, readyfds[1], wakefds[0]); 360 361 /* Wait for everyone to be ready */ 362 for (i = 0; i < total_children; i++) 363 if (read(readyfds[0], &dummy, 1) != 1) 364 barf("Reading for readyfds"); 365 366 gettimeofday(&start, NULL); 367 368 /* Kick them off */ 369 if (write(wakefds[1], &dummy, 1) != 1) 370 barf("Writing to start them"); 371 372 /* Reap them all */ 373 for (i = 0; i < total_children; i++) 374 reap_worker(pth_tab[i]); 375 376 gettimeofday(&stop, NULL); 377 378 /* Print time... */ 379 timersub(&stop, &start, &diff); 380 printf("Time: %lu.%03lu\n", diff.tv_sec, diff.tv_usec/1000); 381 exit(0); 382 } 383 384 385