1 /* 2 * Ebizzy - replicate a large ebusiness type of workload. 3 * 4 * Written by Valerie Henson <val (at) nmt.edu> 5 * 6 * Copyright 2006 - 2007 Intel Corporation 7 * Copyright 2007 Valerie Henson <val (at) nmt.edu> 8 * 9 * Rodrigo Rubira Branco <rrbranco (at) br.ibm.com> - HP/BSD/Solaris port and some 10 * new features 11 * 12 * This program is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU General Public License as published by 14 * the Free Software Foundation; version 2 of the License. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 * GNU General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; if not, write to the Free Software 23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 24 * USA 25 * 26 */ 27 28 /* 29 * This program is designed to replicate a common web search app 30 * workload. A lot of search applications have the basic pattern: Get 31 * a request to find a certain record, index into the chunk of memory 32 * that contains it, copy it into another chunk, then look it up via 33 * binary search. The interesting parts of this workload are: 34 * 35 * Large working set 36 * Data alloc/copy/free cycle 37 * Unpredictable data access patterns 38 * 39 * Fiddle with the command line options until you get something 40 * resembling the kind of workload you want to investigate. 41 * 42 */ 43 44 #include <stdio.h> 45 #include <unistd.h> 46 #include <stdlib.h> 47 #include <sys/mman.h> 48 #include <pthread.h> 49 #include <string.h> 50 #include <time.h> 51 #include <sys/time.h> 52 #include <sys/resource.h> 53 54 #include "ebizzy.h" 55 56 /* 57 * Command line options 58 */ 59 60 static unsigned int always_mmap; 61 static unsigned int never_mmap; 62 static unsigned int chunks; 63 static unsigned int use_permissions; 64 static unsigned int use_holes; 65 static unsigned int random_size; 66 static unsigned int chunk_size; 67 static unsigned int seconds; 68 static unsigned int threads; 69 static unsigned int verbose; 70 static unsigned int linear; 71 static unsigned int touch_pages; 72 static unsigned int no_lib_memcpy; 73 74 /* 75 * Other global variables 76 */ 77 78 typedef size_t record_t; 79 static unsigned int record_size = sizeof(record_t); 80 static char *cmd; 81 static record_t **mem; 82 static char **hole_mem; 83 static unsigned int page_size; 84 static time_t start_time; 85 static volatile int threads_go; 86 static unsigned int records_read; 87 88 static void usage(void) 89 { 90 fprintf(stderr, "Usage: %s [options]\n" 91 "-T\t\t Just 'touch' the allocated pages\n" 92 "-l\t\t Don't use library memcpy\n" 93 "-m\t\t Always use mmap instead of malloc\n" 94 "-M\t\t Never use mmap\n" 95 "-n <num>\t Number of memory chunks to allocate\n" 96 "-p \t\t Prevent mmap coalescing using permissions\n" 97 "-P \t\t Prevent mmap coalescing using holes\n" 98 "-R\t\t Randomize size of memory to copy and search\n" 99 "-s <size>\t Size of memory chunks, in bytes\n" 100 "-S <seconds>\t Number of seconds to run\n" 101 "-t <num>\t Number of threads (2 * number cpus by default)\n" 102 "-v[v[v]]\t Be verbose (more v's for more verbose)\n" 103 "-z\t\t Linear search instead of binary search\n", cmd); 104 exit(1); 105 } 106 107 /* 108 * Read options, check them, and set some defaults. 109 */ 110 111 static void read_options(int argc, char *argv[]) 112 { 113 int c; 114 115 page_size = getpagesize(); 116 117 /* 118 * Set some defaults. These are currently tuned to run in a 119 * reasonable amount of time on my laptop. 120 * 121 * We could set the static defaults in the declarations, but 122 * then the defaults would be split between here and the top 123 * of the file, which is annoying. 124 */ 125 threads = 2 * sysconf(_SC_NPROCESSORS_ONLN); 126 chunks = 10; 127 chunk_size = record_size * 64 * 1024; 128 seconds = 10; 129 130 /* On to option processing */ 131 132 cmd = argv[0]; 133 opterr = 1; 134 135 while ((c = getopt(argc, argv, "lmMn:pPRs:S:t:vzT")) != -1) { 136 switch (c) { 137 case 'l': 138 no_lib_memcpy = 1; 139 break; 140 case 'm': 141 always_mmap = 1; 142 break; 143 case 'M': 144 never_mmap = 1; 145 break; 146 case 'n': 147 chunks = atoi(optarg); 148 if (chunks == 0) 149 usage(); 150 break; 151 case 'p': 152 use_permissions = 1; 153 break; 154 case 'P': 155 use_holes = 1; 156 break; 157 case 'R': 158 random_size = 1; 159 break; 160 case 's': 161 chunk_size = atoi(optarg); 162 if (chunk_size == 0) 163 usage(); 164 break; 165 case 'S': 166 seconds = atoi(optarg); 167 if (seconds == 0) 168 usage(); 169 break; 170 case 't': 171 threads = atoi(optarg); 172 if (threads == 0) 173 usage(); 174 break; 175 case 'T': 176 touch_pages = 1; 177 break; 178 case 'v': 179 verbose++; 180 break; 181 case 'z': 182 linear = 1; 183 break; 184 default: 185 usage(); 186 } 187 } 188 189 if (verbose) 190 printf("ebizzy 0.2\n" 191 "(C) 2006-7 Intel Corporation\n" 192 "(C) 2007 Valerie Henson <val (at) nmt.edu>\n"); 193 194 if (verbose) { 195 printf("always_mmap %u\n", always_mmap); 196 printf("never_mmap %u\n", never_mmap); 197 printf("chunks %u\n", chunks); 198 printf("prevent coalescing using permissions %u\n", 199 use_permissions); 200 printf("prevent coalescing using holes %u\n", use_holes); 201 printf("random_size %u\n", random_size); 202 printf("chunk_size %u\n", chunk_size); 203 printf("seconds %d\n", seconds); 204 printf("threads %u\n", threads); 205 printf("verbose %u\n", verbose); 206 printf("linear %u\n", linear); 207 printf("touch_pages %u\n", touch_pages); 208 printf("page size %d\n", page_size); 209 } 210 211 /* Check for incompatible options */ 212 213 if (always_mmap && never_mmap) { 214 fprintf(stderr, "Both -m \"always mmap\" and -M " 215 "\"never mmap\" option specified\n"); 216 usage(); 217 } 218 219 if (never_mmap) 220 mallopt(M_MMAP_MAX, 0); 221 222 if (chunk_size < record_size) { 223 fprintf(stderr, "Chunk size %u smaller than record size %u\n", 224 chunk_size, record_size); 225 usage(); 226 } 227 } 228 229 static void touch_mem(char *dest, size_t size) 230 { 231 int i; 232 if (touch_pages) { 233 for (i = 0; i < size; i += page_size) 234 *(dest + i) = 0xff; 235 } 236 } 237 238 static void *alloc_mem(size_t size) 239 { 240 char *p; 241 int err = 0; 242 243 if (always_mmap) { 244 p = mmap(NULL, size, (PROT_READ | PROT_WRITE), 245 (MAP_PRIVATE | MAP_ANONYMOUS), -1, 0); 246 if (p == MAP_FAILED) 247 err = 1; 248 } else { 249 p = malloc(size); 250 if (p == NULL) 251 err = 1; 252 } 253 254 if (err) { 255 fprintf(stderr, "Couldn't allocate %zu bytes, try smaller " 256 "chunks or size options\n" 257 "Using -n %u chunks and -s %u size\n", 258 size, chunks, chunk_size); 259 exit(1); 260 } 261 262 return (p); 263 } 264 265 static void free_mem(void *p, size_t size) 266 { 267 if (always_mmap) 268 munmap(p, size); 269 else 270 free(p); 271 } 272 273 /* 274 * Factor out differences in memcpy implementation by optionally using 275 * our own simple memcpy implementation. 276 */ 277 278 static void my_memcpy(void *dest, void *src, size_t len) 279 { 280 char *d = (char *)dest; 281 char *s = (char *)src; 282 int i; 283 284 for (i = 0; i < len; i++) 285 d[i] = s[i]; 286 return; 287 } 288 289 static void allocate(void) 290 { 291 int i; 292 293 mem = alloc_mem(chunks * sizeof(record_t *)); 294 295 if (use_holes) 296 hole_mem = alloc_mem(chunks * sizeof(record_t *)); 297 298 for (i = 0; i < chunks; i++) { 299 mem[i] = (record_t *) alloc_mem(chunk_size); 300 /* Prevent coalescing using holes */ 301 if (use_holes) 302 hole_mem[i] = alloc_mem(page_size); 303 } 304 305 /* Free hole memory */ 306 if (use_holes) 307 for (i = 0; i < chunks; i++) 308 free_mem(hole_mem[i], page_size); 309 310 if (verbose) 311 printf("Allocated memory\n"); 312 } 313 314 static void write_pattern(void) 315 { 316 int i, j; 317 318 for (i = 0; i < chunks; i++) { 319 for (j = 0; j < chunk_size / record_size; j++) 320 mem[i][j] = (record_t) j; 321 /* Prevent coalescing by alternating permissions */ 322 if (use_permissions && (i % 2) == 0) 323 mprotect((void *)mem[i], chunk_size, PROT_READ); 324 } 325 if (verbose) 326 printf("Wrote memory\n"); 327 } 328 329 static void *linear_search(record_t key, record_t * base, size_t size) 330 { 331 record_t *p; 332 record_t *end = base + (size / record_size); 333 334 for (p = base; p < end; p++) 335 if (*p == key) 336 return p; 337 return NULL; 338 } 339 340 static int compare(const void *p1, const void *p2) 341 { 342 return (*(record_t *) p1 - *(record_t *) p2); 343 } 344 345 /* 346 * Stupid ranged random number function. We don't care about quality. 347 * 348 * Inline because it's starting to be a scaling issue. 349 */ 350 351 static inline unsigned int rand_num(unsigned int max, unsigned int *state) 352 { 353 *state *= 1103515245 + 12345; 354 return ((*state / 65536) % max); 355 } 356 357 /* 358 * This function is the meat of the program; the rest is just support. 359 * 360 * In this function, we randomly select a memory chunk, copy it into a 361 * newly allocated buffer, randomly select a search key, look it up, 362 * then free the memory. An option tells us to allocate and copy a 363 * randomly sized chunk of the memory instead of the whole thing. 364 * 365 * Linear search provided for sanity checking. 366 * 367 */ 368 369 static unsigned int search_mem(void) 370 { 371 record_t key, *found; 372 record_t *src, *copy; 373 unsigned int chunk; 374 size_t copy_size = chunk_size; 375 unsigned int i; 376 unsigned int state = 0; 377 378 for (i = 0; threads_go == 1; i++) { 379 chunk = rand_num(chunks, &state); 380 src = mem[chunk]; 381 /* 382 * If we're doing random sizes, we need a non-zero 383 * multiple of record size. 384 */ 385 if (random_size) 386 copy_size = (rand_num(chunk_size / record_size, &state) 387 + 1) * record_size; 388 copy = alloc_mem(copy_size); 389 390 if (touch_pages) { 391 touch_mem((char *)copy, copy_size); 392 } else { 393 394 if (no_lib_memcpy) 395 my_memcpy(copy, src, copy_size); 396 else 397 memcpy(copy, src, copy_size); 398 399 key = rand_num(copy_size / record_size, &state); 400 401 if (verbose > 2) 402 printf("Search key %zu, copy size %zu\n", key, 403 copy_size); 404 if (linear) 405 found = linear_search(key, copy, copy_size); 406 else 407 found = 408 bsearch(&key, copy, copy_size / record_size, 409 record_size, compare); 410 411 /* Below check is mainly for memory corruption or other bug */ 412 if (found == NULL) { 413 fprintf(stderr, "Couldn't find key %zd\n", key); 414 exit(1); 415 } 416 } /* end if ! touch_pages */ 417 418 free_mem(copy, copy_size); 419 } 420 421 return (i); 422 } 423 424 static void *thread_run(void *arg) 425 { 426 427 if (verbose > 1) 428 printf("Thread started\n"); 429 430 /* Wait for the start signal */ 431 432 while (threads_go == 0) ; 433 434 records_read += search_mem(); 435 436 if (verbose > 1) 437 printf("Thread finished, %f seconds\n", 438 difftime(time(NULL), start_time)); 439 440 return NULL; 441 } 442 443 static struct timeval difftimeval(struct timeval *end, struct timeval *start) 444 { 445 struct timeval diff; 446 diff.tv_sec = end->tv_sec - start->tv_sec; 447 diff.tv_usec = end->tv_usec - start->tv_usec; 448 return diff; 449 } 450 451 static void start_threads(void) 452 { 453 pthread_t thread_array[threads]; 454 double elapsed; 455 unsigned int i; 456 struct rusage start_ru, end_ru; 457 struct timeval usr_time, sys_time; 458 int err; 459 460 if (verbose) 461 printf("Threads starting\n"); 462 463 for (i = 0; i < threads; i++) { 464 err = pthread_create(&thread_array[i], NULL, thread_run, NULL); 465 if (err) { 466 fprintf(stderr, "Error creating thread %d\n", i); 467 exit(1); 468 } 469 } 470 471 /* 472 * Begin accounting - this is when we actually do the things 473 * we want to measure. */ 474 475 getrusage(RUSAGE_SELF, &start_ru); 476 start_time = time(NULL); 477 threads_go = 1; 478 sleep(seconds); 479 threads_go = 0; 480 elapsed = difftime(time(NULL), start_time); 481 getrusage(RUSAGE_SELF, &end_ru); 482 483 /* 484 * The rest is just clean up. 485 */ 486 487 for (i = 0; i < threads; i++) { 488 err = pthread_join(thread_array[i], NULL); 489 if (err) { 490 fprintf(stderr, "Error joining thread %d\n", i); 491 exit(1); 492 } 493 } 494 495 if (verbose) 496 printf("Threads finished\n"); 497 498 printf("%u records/s\n", 499 (unsigned int)(((double)records_read) / elapsed)); 500 501 usr_time = difftimeval(&end_ru.ru_utime, &start_ru.ru_utime); 502 sys_time = difftimeval(&end_ru.ru_stime, &start_ru.ru_stime); 503 504 printf("real %5.2f s\n", elapsed); 505 printf("user %5.2f s\n", usr_time.tv_sec + usr_time.tv_usec / 1e6); 506 printf("sys %5.2f s\n", sys_time.tv_sec + sys_time.tv_usec / 1e6); 507 } 508 509 int main(int argc, char *argv[]) 510 { 511 read_options(argc, argv); 512 513 allocate(); 514 515 write_pattern(); 516 517 start_threads(); 518 519 return 0; 520 } 521