Home | History | Annotate | Download | only in ebizzy-0.3
      1 /*
      2  * Ebizzy - replicate a large ebusiness type of workload.
      3  *
      4  * Written by Valerie Henson <val (at) nmt.edu>
      5  *
      6  * Copyright 2006 - 2007 Intel Corporation
      7  * Copyright 2007 Valerie Henson <val (at) nmt.edu>
      8  *
      9  * Rodrigo Rubira Branco <rrbranco (at) br.ibm.com> - HP/BSD/Solaris port and some
     10  *  						 new features
     11  *
     12  * This program is free software; you can redistribute it and/or modify
     13  * it under the terms of the GNU General Public License as published by
     14  * the Free Software Foundation; version 2 of the License.
     15  *
     16  * This program is distributed in the hope that it will be useful,
     17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     19  * GNU General Public License for more details.
     20  *
     21  * You should have received a copy of the GNU General Public License
     22  * along with this program; if not, write to the Free Software
     23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
     24  * USA
     25  *
     26  */
     27 
     28 /*
     29  * This program is designed to replicate a common web search app
     30  * workload.  A lot of search applications have the basic pattern: Get
     31  * a request to find a certain record, index into the chunk of memory
     32  * that contains it, copy it into another chunk, then look it up via
     33  * binary search.  The interesting parts of this workload are:
     34  *
     35  * Large working set
     36  * Data alloc/copy/free cycle
     37  * Unpredictable data access patterns
     38  *
     39  * Fiddle with the command line options until you get something
     40  * resembling the kind of workload you want to investigate.
     41  *
     42  */
     43 
     44 #include <stdio.h>
     45 #include <unistd.h>
     46 #include <stdlib.h>
     47 #include <sys/mman.h>
     48 #include <pthread.h>
     49 #include <string.h>
     50 #include <time.h>
     51 #include <sys/time.h>
     52 #include <sys/resource.h>
     53 
     54 #include "ebizzy.h"
     55 
     56 /*
     57  * Command line options
     58  */
     59 
     60 static unsigned int always_mmap;
     61 static unsigned int never_mmap;
     62 static unsigned int chunks;
     63 static unsigned int use_permissions;
     64 static unsigned int use_holes;
     65 static unsigned int random_size;
     66 static unsigned int chunk_size;
     67 static unsigned int seconds;
     68 static unsigned int threads;
     69 static unsigned int verbose;
     70 static unsigned int linear;
     71 static unsigned int touch_pages;
     72 static unsigned int no_lib_memcpy;
     73 
     74 /*
     75  * Other global variables
     76  */
     77 
     78 typedef size_t record_t;
     79 static unsigned int record_size = sizeof(record_t);
     80 static char *cmd;
     81 static record_t **mem;
     82 static char **hole_mem;
     83 static unsigned int page_size;
     84 static time_t start_time;
     85 static volatile int threads_go;
     86 static unsigned int records_read;
     87 
     88 static void usage(void)
     89 {
     90 	fprintf(stderr, "Usage: %s [options]\n"
     91 		"-T\t\t Just 'touch' the allocated pages\n"
     92 		"-l\t\t Don't use library memcpy\n"
     93 		"-m\t\t Always use mmap instead of malloc\n"
     94 		"-M\t\t Never use mmap\n"
     95 		"-n <num>\t Number of memory chunks to allocate\n"
     96 		"-p \t\t Prevent mmap coalescing using permissions\n"
     97 		"-P \t\t Prevent mmap coalescing using holes\n"
     98 		"-R\t\t Randomize size of memory to copy and search\n"
     99 		"-s <size>\t Size of memory chunks, in bytes\n"
    100 		"-S <seconds>\t Number of seconds to run\n"
    101 		"-t <num>\t Number of threads (2 * number cpus by default)\n"
    102 		"-v[v[v]]\t Be verbose (more v's for more verbose)\n"
    103 		"-z\t\t Linear search instead of binary search\n", cmd);
    104 	exit(1);
    105 }
    106 
    107 /*
    108  * Read options, check them, and set some defaults.
    109  */
    110 
    111 static void read_options(int argc, char *argv[])
    112 {
    113 	int c;
    114 
    115 	page_size = getpagesize();
    116 
    117 	/*
    118 	 * Set some defaults.  These are currently tuned to run in a
    119 	 * reasonable amount of time on my laptop.
    120 	 *
    121 	 * We could set the static defaults in the declarations, but
    122 	 * then the defaults would be split between here and the top
    123 	 * of the file, which is annoying.
    124 	 */
    125 	threads = 2 * sysconf(_SC_NPROCESSORS_ONLN);
    126 	chunks = 10;
    127 	chunk_size = record_size * 64 * 1024;
    128 	seconds = 10;
    129 
    130 	/* On to option processing */
    131 
    132 	cmd = argv[0];
    133 	opterr = 1;
    134 
    135 	while ((c = getopt(argc, argv, "lmMn:pPRs:S:t:vzT")) != -1) {
    136 		switch (c) {
    137 		case 'l':
    138 			no_lib_memcpy = 1;
    139 			break;
    140 		case 'm':
    141 			always_mmap = 1;
    142 			break;
    143 		case 'M':
    144 			never_mmap = 1;
    145 			break;
    146 		case 'n':
    147 			chunks = atoi(optarg);
    148 			if (chunks == 0)
    149 				usage();
    150 			break;
    151 		case 'p':
    152 			use_permissions = 1;
    153 			break;
    154 		case 'P':
    155 			use_holes = 1;
    156 			break;
    157 		case 'R':
    158 			random_size = 1;
    159 			break;
    160 		case 's':
    161 			chunk_size = atoi(optarg);
    162 			if (chunk_size == 0)
    163 				usage();
    164 			break;
    165 		case 'S':
    166 			seconds = atoi(optarg);
    167 			if (seconds == 0)
    168 				usage();
    169 			break;
    170 		case 't':
    171 			threads = atoi(optarg);
    172 			if (threads == 0)
    173 				usage();
    174 			break;
    175 		case 'T':
    176 			touch_pages = 1;
    177 			break;
    178 		case 'v':
    179 			verbose++;
    180 			break;
    181 		case 'z':
    182 			linear = 1;
    183 			break;
    184 		default:
    185 			usage();
    186 		}
    187 	}
    188 
    189 	if (verbose)
    190 		printf("ebizzy 0.2\n"
    191 		       "(C) 2006-7 Intel Corporation\n"
    192 		       "(C) 2007 Valerie Henson <val (at) nmt.edu>\n");
    193 
    194 	if (verbose) {
    195 		printf("always_mmap %u\n", always_mmap);
    196 		printf("never_mmap %u\n", never_mmap);
    197 		printf("chunks %u\n", chunks);
    198 		printf("prevent coalescing using permissions %u\n",
    199 		       use_permissions);
    200 		printf("prevent coalescing using holes %u\n", use_holes);
    201 		printf("random_size %u\n", random_size);
    202 		printf("chunk_size %u\n", chunk_size);
    203 		printf("seconds %d\n", seconds);
    204 		printf("threads %u\n", threads);
    205 		printf("verbose %u\n", verbose);
    206 		printf("linear %u\n", linear);
    207 		printf("touch_pages %u\n", touch_pages);
    208 		printf("page size %d\n", page_size);
    209 	}
    210 
    211 	/* Check for incompatible options */
    212 
    213 	if (always_mmap && never_mmap) {
    214 		fprintf(stderr, "Both -m \"always mmap\" and -M "
    215 			"\"never mmap\" option specified\n");
    216 		usage();
    217 	}
    218 
    219 	if (never_mmap)
    220 		mallopt(M_MMAP_MAX, 0);
    221 
    222 	if (chunk_size < record_size) {
    223 		fprintf(stderr, "Chunk size %u smaller than record size %u\n",
    224 			chunk_size, record_size);
    225 		usage();
    226 	}
    227 }
    228 
    229 static void touch_mem(char *dest, size_t size)
    230 {
    231 	int i;
    232 	if (touch_pages) {
    233 		for (i = 0; i < size; i += page_size)
    234 			*(dest + i) = 0xff;
    235 	}
    236 }
    237 
    238 static void *alloc_mem(size_t size)
    239 {
    240 	char *p;
    241 	int err = 0;
    242 
    243 	if (always_mmap) {
    244 		p = mmap(NULL, size, (PROT_READ | PROT_WRITE),
    245 			 (MAP_PRIVATE | MAP_ANONYMOUS), -1, 0);
    246 		if (p == MAP_FAILED)
    247 			err = 1;
    248 	} else {
    249 		p = malloc(size);
    250 		if (p == NULL)
    251 			err = 1;
    252 	}
    253 
    254 	if (err) {
    255 		fprintf(stderr, "Couldn't allocate %zu bytes, try smaller "
    256 			"chunks or size options\n"
    257 			"Using -n %u chunks and -s %u size\n",
    258 			size, chunks, chunk_size);
    259 		exit(1);
    260 	}
    261 
    262 	return (p);
    263 }
    264 
    265 static void free_mem(void *p, size_t size)
    266 {
    267 	if (always_mmap)
    268 		munmap(p, size);
    269 	else
    270 		free(p);
    271 }
    272 
    273 /*
    274  * Factor out differences in memcpy implementation by optionally using
    275  * our own simple memcpy implementation.
    276  */
    277 
    278 static void my_memcpy(void *dest, void *src, size_t len)
    279 {
    280 	char *d = (char *)dest;
    281 	char *s = (char *)src;
    282 	int i;
    283 
    284 	for (i = 0; i < len; i++)
    285 		d[i] = s[i];
    286 	return;
    287 }
    288 
    289 static void allocate(void)
    290 {
    291 	int i;
    292 
    293 	mem = alloc_mem(chunks * sizeof(record_t *));
    294 
    295 	if (use_holes)
    296 		hole_mem = alloc_mem(chunks * sizeof(record_t *));
    297 
    298 	for (i = 0; i < chunks; i++) {
    299 		mem[i] = (record_t *) alloc_mem(chunk_size);
    300 		/* Prevent coalescing using holes */
    301 		if (use_holes)
    302 			hole_mem[i] = alloc_mem(page_size);
    303 	}
    304 
    305 	/* Free hole memory */
    306 	if (use_holes)
    307 		for (i = 0; i < chunks; i++)
    308 			free_mem(hole_mem[i], page_size);
    309 
    310 	if (verbose)
    311 		printf("Allocated memory\n");
    312 }
    313 
    314 static void write_pattern(void)
    315 {
    316 	int i, j;
    317 
    318 	for (i = 0; i < chunks; i++) {
    319 		for (j = 0; j < chunk_size / record_size; j++)
    320 			mem[i][j] = (record_t) j;
    321 		/* Prevent coalescing by alternating permissions */
    322 		if (use_permissions && (i % 2) == 0)
    323 			mprotect((void *)mem[i], chunk_size, PROT_READ);
    324 	}
    325 	if (verbose)
    326 		printf("Wrote memory\n");
    327 }
    328 
    329 static void *linear_search(record_t key, record_t * base, size_t size)
    330 {
    331 	record_t *p;
    332 	record_t *end = base + (size / record_size);
    333 
    334 	for (p = base; p < end; p++)
    335 		if (*p == key)
    336 			return p;
    337 	return NULL;
    338 }
    339 
    340 static int compare(const void *p1, const void *p2)
    341 {
    342 	return (*(record_t *) p1 - *(record_t *) p2);
    343 }
    344 
    345 /*
    346  * Stupid ranged random number function.  We don't care about quality.
    347  *
    348  * Inline because it's starting to be a scaling issue.
    349  */
    350 
    351 static inline unsigned int rand_num(unsigned int max, unsigned int *state)
    352 {
    353 	*state *= 1103515245 + 12345;
    354 	return ((*state / 65536) % max);
    355 }
    356 
    357 /*
    358  * This function is the meat of the program; the rest is just support.
    359  *
    360  * In this function, we randomly select a memory chunk, copy it into a
    361  * newly allocated buffer, randomly select a search key, look it up,
    362  * then free the memory.  An option tells us to allocate and copy a
    363  * randomly sized chunk of the memory instead of the whole thing.
    364  *
    365  * Linear search provided for sanity checking.
    366  *
    367  */
    368 
    369 static unsigned int search_mem(void)
    370 {
    371 	record_t key, *found;
    372 	record_t *src, *copy;
    373 	unsigned int chunk;
    374 	size_t copy_size = chunk_size;
    375 	unsigned int i;
    376 	unsigned int state = 0;
    377 
    378 	for (i = 0; threads_go == 1; i++) {
    379 		chunk = rand_num(chunks, &state);
    380 		src = mem[chunk];
    381 		/*
    382 		 * If we're doing random sizes, we need a non-zero
    383 		 * multiple of record size.
    384 		 */
    385 		if (random_size)
    386 			copy_size = (rand_num(chunk_size / record_size, &state)
    387 				     + 1) * record_size;
    388 		copy = alloc_mem(copy_size);
    389 
    390 		if (touch_pages) {
    391 			touch_mem((char *)copy, copy_size);
    392 		} else {
    393 
    394 			if (no_lib_memcpy)
    395 				my_memcpy(copy, src, copy_size);
    396 			else
    397 				memcpy(copy, src, copy_size);
    398 
    399 			key = rand_num(copy_size / record_size, &state);
    400 
    401 			if (verbose > 2)
    402 				printf("Search key %zu, copy size %zu\n", key,
    403 				       copy_size);
    404 			if (linear)
    405 				found = linear_search(key, copy, copy_size);
    406 			else
    407 				found =
    408 				    bsearch(&key, copy, copy_size / record_size,
    409 					    record_size, compare);
    410 
    411 			/* Below check is mainly for memory corruption or other bug */
    412 			if (found == NULL) {
    413 				fprintf(stderr, "Couldn't find key %zd\n", key);
    414 				exit(1);
    415 			}
    416 		}		/* end if ! touch_pages */
    417 
    418 		free_mem(copy, copy_size);
    419 	}
    420 
    421 	return (i);
    422 }
    423 
    424 static void *thread_run(void *arg)
    425 {
    426 
    427 	if (verbose > 1)
    428 		printf("Thread started\n");
    429 
    430 	/* Wait for the start signal */
    431 
    432 	while (threads_go == 0) ;
    433 
    434 	records_read += search_mem();
    435 
    436 	if (verbose > 1)
    437 		printf("Thread finished, %f seconds\n",
    438 		       difftime(time(NULL), start_time));
    439 
    440 	return NULL;
    441 }
    442 
    443 static struct timeval difftimeval(struct timeval *end, struct timeval *start)
    444 {
    445 	struct timeval diff;
    446 	diff.tv_sec = end->tv_sec - start->tv_sec;
    447 	diff.tv_usec = end->tv_usec - start->tv_usec;
    448 	return diff;
    449 }
    450 
    451 static void start_threads(void)
    452 {
    453 	pthread_t thread_array[threads];
    454 	double elapsed;
    455 	unsigned int i;
    456 	struct rusage start_ru, end_ru;
    457 	struct timeval usr_time, sys_time;
    458 	int err;
    459 
    460 	if (verbose)
    461 		printf("Threads starting\n");
    462 
    463 	for (i = 0; i < threads; i++) {
    464 		err = pthread_create(&thread_array[i], NULL, thread_run, NULL);
    465 		if (err) {
    466 			fprintf(stderr, "Error creating thread %d\n", i);
    467 			exit(1);
    468 		}
    469 	}
    470 
    471 	/*
    472 	 * Begin accounting - this is when we actually do the things
    473 	 * we want to measure. */
    474 
    475 	getrusage(RUSAGE_SELF, &start_ru);
    476 	start_time = time(NULL);
    477 	threads_go = 1;
    478 	sleep(seconds);
    479 	threads_go = 0;
    480 	elapsed = difftime(time(NULL), start_time);
    481 	getrusage(RUSAGE_SELF, &end_ru);
    482 
    483 	/*
    484 	 * The rest is just clean up.
    485 	 */
    486 
    487 	for (i = 0; i < threads; i++) {
    488 		err = pthread_join(thread_array[i], NULL);
    489 		if (err) {
    490 			fprintf(stderr, "Error joining thread %d\n", i);
    491 			exit(1);
    492 		}
    493 	}
    494 
    495 	if (verbose)
    496 		printf("Threads finished\n");
    497 
    498 	printf("%u records/s\n",
    499 	       (unsigned int)(((double)records_read) / elapsed));
    500 
    501 	usr_time = difftimeval(&end_ru.ru_utime, &start_ru.ru_utime);
    502 	sys_time = difftimeval(&end_ru.ru_stime, &start_ru.ru_stime);
    503 
    504 	printf("real %5.2f s\n", elapsed);
    505 	printf("user %5.2f s\n", usr_time.tv_sec + usr_time.tv_usec / 1e6);
    506 	printf("sys  %5.2f s\n", sys_time.tv_sec + sys_time.tv_usec / 1e6);
    507 }
    508 
    509 int main(int argc, char *argv[])
    510 {
    511 	read_options(argc, argv);
    512 
    513 	allocate();
    514 
    515 	write_pattern();
    516 
    517 	start_threads();
    518 
    519 	return 0;
    520 }
    521