Home | History | Annotate | Download | only in mongo
      1 /*
      2  * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
      3  */
      4 
      5 #include <stdlib.h>
      6 #include <stdio.h>
      7 #include <string.h>
      8 #include <math.h>
      9 #include <sys/types.h>
     10 #include <unistd.h>
     11 #include <sys/stat.h>
     12 #include <fcntl.h>
     13 #include <errno.h>
     14 char tdir[256];
     15 char path[256];
     16 long stats = 0;
     17 
     18 void print_usage()
     19 {
     20 	printf("
     21 This program creates files in a tree of random depth and branching. Files vary
     22 in size randomly according to a distribution function which seems to model real
     23 file systems.  This distribution function has a median size of median_file_size
     24 (Median file size is hypothesized to be proportional to the average per file
     25 space wastage. Notice how that implies that with a more efficient file system
     26 file size usage patterns will in the long term move to a lower median file
     27 size), and a maximum size of max_file_size.  Directories vary in size according
     28 to the same distribution function but with separate parameters to control median
     29 and maximum size for the number of files within them, and the number of
     30 subdirectories within them.  This program prunes some empty subdirectories in a
     31 way that causes parents of leaf directories to branch less than
     32 median_dir_branching.
     33 
     34  To avoid having one large file distort the results such that you have
     35 to benchmark many times set max_file_size to not more than
     36 bytes_to_consume/10.  If maximum/median is a small integer, then
     37 randomness is very poor.  This is a bug, Nikita, please find some
     38 clever way to fix it.  If it is 0, then the program crashes....
     39 
     40 For isolating performance consequences of design variations on
     41 particular file or directory size ranges, try setting their median size and
     42 max_size to both equal the max size of the file size range you want
     43 to test.
     44 
     45 To avoid having one large file distort the results set max_file_size to
     46 not more than bytes_to_consume/10.  Using a distribution function for
     47 the sizes of writes would be a natural next step in developing this program.\n\n");
     48 
     49 	printf
     50 	    ("Usage: reiser_fract_tree bytes_to_consume median_file_size max_file_size median_dir_nr_files max_directory_nr_files median_dir_branching max_dir_branching write_buffer_size /testfs_mount_point print_stats_flag\n\n");
     51 }
     52 
     53 /* #define DEBUG */
     54 
     55 char *write_buffer;		/* buffer from which we write */
     56 int write_buffer_size = 0;	/* gets reset to an argv  */
     57 static int already_whined = 0;	/* keep out of disk space errors from being
     58 				   endless by tracking whether we already
     59 				   printed the message */
     60 long bytes_to_consume = 0;	/* create files until their total number of
     61 				   bytes exceeds this number, but not by more
     62 				   than 1/10th */
     63 long byte_total = 0;		/* bytes created so far */
     64 
     65 /* statistics on sizes of files we attempted to create */
     66 int fsz_0_100 = 0;
     67 int fsz_100_1k = 0;
     68 int fsz_1k_10k = 0;
     69 int fsz_10k_100k = 0;
     70 int fsz_100k_1m = 0;
     71 int fsz_1m_10m = 0;
     72 int fsz_10m_larger = 0;
     73 
     74 void chngdir(char *name)
     75 {
     76 	int i;
     77 
     78 	if (name[0] == '.' && name[1] == '.') {
     79 		for (i = strlen(path); i > 0; i--) {
     80 			if (path[i] == '/') {
     81 				path[i] = 0;
     82 				break;
     83 			}
     84 		}
     85 	} else {
     86 		strcat(path, "/");
     87 		strcat(path, name);
     88 	}
     89 }
     90 
     91 /* this is the core statistical distribution function, and it is used for file
     92    sizes, directory sizes, etc. */
     93 int determine_size(double median_size,
     94 		   double max_size /* The maximal value of size */ )
     95 {
     96 	/* when x is half of its random range (max_size/median_size), result is
     97 	   median_size */
     98 	int nr_random, granularity_reducer;
     99 	double size, double_nr_random;
    100 
    101 	/* it is a feature for us that this repeats identically every time it is run,
    102 	   as otherwise meaningless variances would affect our results and require us
    103 	   to use a higher number of benchmarks to achieve low noise results.  */
    104 	nr_random = rand();
    105 	median_size++;		/* avoids divide by zero errors */
    106 
    107 	/* this code does poorly when max_size is not a lot more than median size,
    108 	   and that needs fixing */
    109 
    110 	/* THE NEXT 2 LINES ARE THE HEART OF THE PROGRAM */
    111 
    112 	/* keep x below the value that when multiplied by median size on the next
    113 	   line will equal max_size */
    114 	/* the granularity_reducer is to handle the case where max_size is near
    115 	   median_size, since '%' can only take ints, we need this complicated what
    116 	   of handling that for small values of max_size/median_size by making
    117 	   large ints out of small ints temporarily.  */
    118 	if (max_size / median_size < 1024)
    119 		granularity_reducer = 1024 * 1024;
    120 	else
    121 		granularity_reducer = 1;
    122 	nr_random =
    123 	    nr_random %
    124 	    ((int)
    125 	     (granularity_reducer *
    126 	      (((double)max_size) / ((double)median_size))));
    127 	double_nr_random = ((double)nr_random) / (granularity_reducer);
    128 	size =
    129 	    median_size * (1 /
    130 			   (1 -
    131 			    (double_nr_random) / (((double)max_size) /
    132 						  ((double)median_size))) - 1);
    133 	return ((int)size);
    134 }
    135 
    136 /* generate a unique filename */
    137 void get_name_by_number(long this_files_number, char *str)
    138 {
    139 	sprintf(str, "%lu", this_files_number);
    140 }
    141 
    142 /* make a file of a specified size */
    143 void make_file(int size)
    144 {
    145 	char string[128] = { 0 };
    146 	char *str = string;
    147 	char fname[256];
    148 	int fd = 0;
    149 	int error;
    150 	static long this_files_number = 1;
    151 
    152 	/* collect statistics about the size of files created, or more precisely, the
    153 	   size of files that we will attempt to create. */
    154 	if (size <= 100)
    155 		fsz_0_100++;
    156 	else if (size <= 1000)
    157 		fsz_100_1k++;
    158 	else if (size <= 10 * 1000)
    159 		fsz_1k_10k++;
    160 	else if (size <= 100 * 1000)
    161 		fsz_10k_100k++;
    162 	else if (size <= 1000 * 1000)
    163 		fsz_100k_1m++;
    164 	else if (size <= 10 * 1000 * 1000)
    165 		fsz_1m_10m++;
    166 	else
    167 		fsz_10m_larger++;
    168 
    169 	/* construct a name for the file */
    170 	get_name_by_number(this_files_number++, str);
    171 	strcpy(fname, path);
    172 	strcat(fname, "/");
    173 	strcat(fname, str);
    174 
    175 	/* open the file, and deal with the various errors that can occur */
    176 
    177 	if ((fd = open(fname, O_CREAT | O_EXCL | O_RDWR, 0777)) == -1) {
    178 		if (errno == ENOSPC) {
    179 			if (!already_whined) {
    180 				printf
    181 				    ("reiser-2021A: out of disk (or inodes) space, will keep trying\n");
    182 				already_whined = 1;	/* we continue other file creation in out of
    183 							   space conditions */
    184 			}
    185 			return;
    186 		}
    187 		/*  it is sometimes useful to be able to run this program more than once
    188 		   inside the same directory, and that means skipping over filenames that
    189 		   already exist.  Thus we ignore EEXIST, and pay attention to all
    190 		   else. */
    191 		if (errno == EEXIST) {	/* just skip existing file */
    192 			return;
    193 		}
    194 		perror("open");
    195 		exit(errno);
    196 	}
    197 	/* write to the file until it is the right size, handling the various error
    198 	   conditions appropriately */
    199 
    200 	while (size > 0) {
    201 		size -= (error =
    202 			 write(fd, write_buffer,
    203 			       (size <
    204 				write_buffer_size -
    205 				1) ? size : (write_buffer_size - 1)));
    206 		if (error == -1) {
    207 			if (errno == ENOSPC) {
    208 				if (!already_whined) {
    209 					printf
    210 					    ("reiser-2022: out of disk space, will keep trying\n");
    211 					already_whined = 1;
    212 				}
    213 				close(fd);
    214 				return;
    215 			}
    216 			perror("write() failed");
    217 			exit(errno);
    218 		}
    219 	}
    220 
    221 	/* close the file */
    222 	if (close(fd)) {
    223 		perror("close() failed");
    224 		exit(errno);
    225 	}
    226 }
    227 
    228 /* print the statistics on how many files were created of what size */
    229 
    230 void print_stats()
    231 {
    232 	if (!stats)
    233 		return;
    234 
    235 	printf("\n");
    236 	printf("File stats: Units are decimal (1k = 1000)\n");
    237 	printf("files 0-100    : %i\n", fsz_0_100);
    238 	printf("files 100-1K   : %i\n", fsz_100_1k);
    239 	printf("files 1K-10K   : %i\n", fsz_1k_10k);
    240 	printf("files 10K-100K : %i\n", fsz_10k_100k);
    241 	printf("files 100K-1M  : %i\n", fsz_100k_1m);
    242 	printf("files 1M-10M    : %i\n", fsz_1m_10m);
    243 	printf("files 10M-larger    : %i\n", fsz_10m_larger);
    244 	printf("total bytes written    : %lu\n", byte_total);
    245 
    246 }
    247 
    248 /* predict the number of files that will be created before max_bytes total
    249    length of files is reached */
    250 long determine_nr_of_files(int median_file_size, double max_file_size,
    251 			   long bytes_to_consume)
    252 {
    253 	long nr_of_files = 0, byte_total = 0;
    254 
    255 	/* the next line is not necessary as 1 is the default, it is just cautious
    256 	   coding */
    257 	srand(1);
    258 	while (byte_total < bytes_to_consume) {
    259 		byte_total += determine_size(median_file_size, max_file_size);
    260 		nr_of_files++;
    261 	}
    262 	/* reset the random number generator so that when we determine_size() of the
    263 	   files later they will be created with the same "random" sequence used in
    264 	   this calculation */
    265 	srand(1);
    266 #ifdef DEBUG
    267 	printf("number of files is %d\n", (int)nr_of_files);
    268 #endif /* DEBUG */
    269 	fflush(NULL);
    270 	return nr_of_files;
    271 }
    272 
    273 /* fill the current working directory with nr_files_this_directory number of
    274    files*/
    275 
    276 void fill_this_directory(long nr_files_this_directory, long median_file_size,
    277 			 long maximum_size)
    278 {
    279 	long size;
    280 
    281 #ifdef DEBUG
    282 	printf("filling with %lu files, ", nr_files_this_directory);
    283 #endif
    284 	while (nr_files_this_directory--) {
    285 		size = determine_size(median_file_size, maximum_size);
    286 		byte_total += size;
    287 		make_file(size);
    288 	}
    289 }
    290 
    291 /* this will unfortunately handle out of disk space by forever trying */
    292 /* What we should do in out of space situaltion ? I think we must skip this
    293    directory and continue files/dirs creation process. Error value (!= 0)
    294    indicates that we can't go to this directory. -zam */
    295 int make_directory(char *dirname)
    296 {
    297 	static long this_directory_number = 0;
    298 
    299 	strcpy(tdir, path);
    300 	strcat(tdir, "/");
    301 	strcat(tdir, dirname);
    302 
    303 	if (mkdir(tdir, 0755) == -1) {
    304 		if (errno == ENOSPC) {
    305 			if (!already_whined) {
    306 				printf("reiser-2021: out of disk space, ");
    307 				already_whined = 1;
    308 			}
    309 			return errno;
    310 		}
    311 		/*  it is sometimes useful to be able to run this program more than once
    312 		   inside the same directory, and that means skipping over filenames that
    313 		   already exist.  Thus we ignore EEXIST, and pay attention to all else. */
    314 		if (errno != EEXIST) {
    315 			perror("mkdir");
    316 			exit(errno);
    317 		}
    318 	}
    319 	sprintf(dirname, "d%lu", this_directory_number++);
    320 	strcpy(tdir, path);
    321 	strcat(tdir, "/");
    322 	strcat(tdir, dirname);
    323 
    324 	return 0;
    325 }
    326 
    327 /* assumes we are already chdir'd into a directory that the subtree is rooted
    328    at.  Fills the directory with files and subdirectories, cd's into those
    329    subdirectories, and recurses upon itself */
    330 
    331 void do_subtree(
    332 		       /* the start and end of the portion of the directory sizes
    333 		          array which corresponds to the sizes of the directories
    334 		          composing this subtree */
    335 		       /* sizes_end minus sizes_start is equal to the number of
    336 		          directories in this subtree */
    337 		       long *sizes_start, long *sizes_end,
    338 		       long median_file_size, long maximum_file_size,
    339 		       long median_dir_branching, long max_dir_branching)
    340 {
    341 	long *p;
    342 	long *sub_start;
    343 	long *sub_end;
    344 	int index_subdirectory_to_add_directory_to;
    345 	long *dirs_in_subtrees;
    346 	char *subtree_name;
    347 	long *sizes_index = sizes_start;
    348 	char subtree_name_array[128];
    349 	long this_directory_branching;
    350 	static long this_directorys_number;
    351 
    352 	subtree_name = subtree_name_array;
    353 	/* fill this directory with its number of files */
    354 	fill_this_directory(*sizes_index, median_file_size, maximum_file_size);
    355 	sizes_index++;
    356 	/* ok, now randomly assign directories (and their number of files) among the
    357 	   subdirectories that will be created if at least one directory is assigned
    358 	   to it */
    359 
    360 	/* this will cause the random number sequence to not match the one used in
    361 	   determine_nr_files() I need to accumulate my values in an array
    362 	   beforehand. I'll code that later.  */
    363 	/* worry about whether 0 or 1 is a problem value */
    364 	this_directory_branching =
    365 	    determine_size(median_dir_branching, max_dir_branching) + 1;
    366 
    367 	/* create an array holding the number of directories assigned to each
    368 	   potential subdirectory */
    369 	dirs_in_subtrees = calloc(this_directory_branching, sizeof(long));
    370 	while (sizes_index <= sizes_end) {
    371 		index_subdirectory_to_add_directory_to =
    372 		    (rand() % this_directory_branching);
    373 		(*
    374 		 (dirs_in_subtrees + index_subdirectory_to_add_directory_to))++;
    375 		sizes_index++;
    376 	}
    377 	/* the +1 is for the fill_directory() we did above */
    378 	sizes_index = sizes_start + 1;
    379 
    380 	/* go through each potential subdirectory, and if at least one directory has
    381 	   been assigned to it, create it and recurse */
    382 	for (p = dirs_in_subtrees;
    383 	     p < (dirs_in_subtrees + this_directory_branching); p++) {
    384 		if (*p) {
    385 			int nocd;
    386 			sprintf(subtree_name, "d%lu", this_directorys_number++);
    387 			nocd = make_directory(subtree_name);
    388 			/* if make_dir.. may fails (in out of space situation), we continue
    389 			   creation process in same dir */
    390 			if (!nocd)
    391 				chngdir(subtree_name);
    392 			sub_start = sizes_index;
    393 			/* the minus one is because *p is the number of elements and arrays start at 0 */
    394 			sub_end = (sizes_index + (*p - 1));
    395 
    396 #ifdef DEBUG
    397 			/* comment this back in if the array logic has you going cross-eyed */
    398 			/*      printf ("sizes_start is %p, sizes_index is %p, sizes_index+p is %p, sizes_end is %p\n", sizes_start, sub_start, sub_end, sizes_end); */
    399 #endif
    400 			do_subtree(sub_start, sub_end, median_file_size,
    401 				   maximum_file_size, median_dir_branching,
    402 				   max_dir_branching);
    403 			if (!nocd)
    404 				chngdir("..");
    405 		}
    406 		sizes_index += *p;
    407 	}
    408 }
    409 
    410 /* We have already determined that nr_files can fit in bytes_to_consume space.
    411    Fill the sizes array with the number of files to be in each directory, and
    412    then call do_subtree to fill the tree with files and directories.  */
    413 
    414 void make_fractal_tree(long median_file_size, long maximum_file_size,
    415 		       long median_dir_nr_files, long max_dir_nr_files,
    416 		       long median_dir_branching, long max_dir_branching,
    417 		       long nr_files)
    418 {
    419 	long *sizes_start;
    420 	long *sizes_end;
    421 	long *sizes_index;
    422 	long remaining_files = nr_files;
    423 
    424 	/* collect together array of directory sizes for whole filesystem.  This
    425 	   cannot easily be done recursively without distorting the directory sizes
    426 	   and making deeper directories smaller.  Send me the code if you
    427 	   disagree.:-) */
    428 	/* we almost certainly don't need this much space, but so what.... */
    429 	sizes_index = sizes_start = malloc(nr_files * sizeof(long));
    430 	for (; remaining_files > 0;) {
    431 		*sizes_index =
    432 		    determine_size(median_dir_nr_files, max_dir_nr_files);
    433 		// we alloc space for nr_files, so we should avoid
    434 		// number of files in directory = 0 -grev.
    435 		if (*sizes_index == 0)
    436 			*sizes_index = 1;
    437 		*sizes_index =
    438 		    (*sizes_index <
    439 		     remaining_files) ? *sizes_index : remaining_files;
    440 
    441 #ifdef DEBUG
    442 		printf("*sizes_index == %lu, ", *sizes_index);
    443 #endif
    444 		remaining_files -= *sizes_index;
    445 		sizes_index++;
    446 	}
    447 	/* don't decrement below sizes_start if nr_files is 0 */
    448 	sizes_end = (sizes_index-- > sizes_start) ? sizes_index : sizes_start;
    449 
    450 	sizes_index = sizes_start;
    451 	srand(1);
    452 	do_subtree(sizes_start, sizes_end, median_file_size, maximum_file_size,
    453 		   median_dir_branching, max_dir_branching);
    454 
    455 }
    456 
    457 int main(int argc, char *argv[])
    458 {
    459 	/* initialized from argv[] */
    460 	long median_file_size,
    461 	    median_dir_branching,
    462 	    median_dir_nr_files,
    463 	    max_dir_nr_files, max_dir_branching, max_file_size;
    464 	long nr_of_files = 0;	/* files to be created */
    465 
    466 	if (argc != 11) {
    467 		print_usage();
    468 		exit(1);
    469 	}
    470 
    471 	write_buffer_size = atoi(argv[8]);
    472 	write_buffer = malloc(write_buffer_size);
    473 	memset(write_buffer, 'a', write_buffer_size);
    474 
    475 	/* the number of bytes that we desire this tree to consume.  It will actually
    476 	   consume more, because the last file will overshoot by a random amount, and
    477 	   because the directories and metadata will consume space.  */
    478 	bytes_to_consume = atol(argv[1]);
    479 	max_file_size = atol(argv[3]);
    480 	median_file_size = atol(argv[2]);
    481 	/* Figure out how many random files will fit into bytes_to_consume bytes. We
    482 	   depend on resetting rand() to get the same result later. */
    483 	nr_of_files =
    484 	    determine_nr_of_files(median_file_size, max_file_size,
    485 				  bytes_to_consume);
    486 
    487 	strcpy(path, argv[9]);
    488 	mkdir(path, 0755);
    489 	stats = atol(argv[10]);
    490 	median_dir_branching = atol(argv[6]);
    491 	max_dir_branching = atol(argv[7]);
    492 	median_dir_nr_files = atol(argv[4]);
    493 	max_dir_nr_files = atol(argv[5]);
    494 	make_fractal_tree(median_file_size, max_file_size, median_dir_nr_files,
    495 			  max_dir_nr_files, median_dir_branching,
    496 			  max_dir_branching, nr_of_files);
    497 	print_stats();
    498 	if (stats)
    499 		printf("\nreiser_fract_tree finished\n");
    500 
    501 	return 0;
    502 }
    503