1 /* 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 3 */ 4 5 #include <stdlib.h> 6 #include <stdio.h> 7 #include <string.h> 8 #include <math.h> 9 #include <sys/types.h> 10 #include <unistd.h> 11 #include <sys/stat.h> 12 #include <fcntl.h> 13 #include <errno.h> 14 char tdir[256]; 15 char path[256]; 16 long stats = 0; 17 18 void print_usage() 19 { 20 printf(" 21 This program creates files in a tree of random depth and branching. Files vary 22 in size randomly according to a distribution function which seems to model real 23 file systems. This distribution function has a median size of median_file_size 24 (Median file size is hypothesized to be proportional to the average per file 25 space wastage. Notice how that implies that with a more efficient file system 26 file size usage patterns will in the long term move to a lower median file 27 size), and a maximum size of max_file_size. Directories vary in size according 28 to the same distribution function but with separate parameters to control median 29 and maximum size for the number of files within them, and the number of 30 subdirectories within them. This program prunes some empty subdirectories in a 31 way that causes parents of leaf directories to branch less than 32 median_dir_branching. 33 34 To avoid having one large file distort the results such that you have 35 to benchmark many times set max_file_size to not more than 36 bytes_to_consume/10. If maximum/median is a small integer, then 37 randomness is very poor. This is a bug, Nikita, please find some 38 clever way to fix it. If it is 0, then the program crashes.... 39 40 For isolating performance consequences of design variations on 41 particular file or directory size ranges, try setting their median size and 42 max_size to both equal the max size of the file size range you want 43 to test. 44 45 To avoid having one large file distort the results set max_file_size to 46 not more than bytes_to_consume/10. Using a distribution function for 47 the sizes of writes would be a natural next step in developing this program.\n\n"); 48 49 printf 50 ("Usage: reiser_fract_tree bytes_to_consume median_file_size max_file_size median_dir_nr_files max_directory_nr_files median_dir_branching max_dir_branching write_buffer_size /testfs_mount_point print_stats_flag\n\n"); 51 } 52 53 /* #define DEBUG */ 54 55 char *write_buffer; /* buffer from which we write */ 56 int write_buffer_size = 0; /* gets reset to an argv */ 57 static int already_whined = 0; /* keep out of disk space errors from being 58 endless by tracking whether we already 59 printed the message */ 60 long bytes_to_consume = 0; /* create files until their total number of 61 bytes exceeds this number, but not by more 62 than 1/10th */ 63 long byte_total = 0; /* bytes created so far */ 64 65 /* statistics on sizes of files we attempted to create */ 66 int fsz_0_100 = 0; 67 int fsz_100_1k = 0; 68 int fsz_1k_10k = 0; 69 int fsz_10k_100k = 0; 70 int fsz_100k_1m = 0; 71 int fsz_1m_10m = 0; 72 int fsz_10m_larger = 0; 73 74 void chngdir(char *name) 75 { 76 int i; 77 78 if (name[0] == '.' && name[1] == '.') { 79 for (i = strlen(path); i > 0; i--) { 80 if (path[i] == '/') { 81 path[i] = 0; 82 break; 83 } 84 } 85 } else { 86 strcat(path, "/"); 87 strcat(path, name); 88 } 89 } 90 91 /* this is the core statistical distribution function, and it is used for file 92 sizes, directory sizes, etc. */ 93 int determine_size(double median_size, 94 double max_size /* The maximal value of size */ ) 95 { 96 /* when x is half of its random range (max_size/median_size), result is 97 median_size */ 98 int nr_random, granularity_reducer; 99 double size, double_nr_random; 100 101 /* it is a feature for us that this repeats identically every time it is run, 102 as otherwise meaningless variances would affect our results and require us 103 to use a higher number of benchmarks to achieve low noise results. */ 104 nr_random = rand(); 105 median_size++; /* avoids divide by zero errors */ 106 107 /* this code does poorly when max_size is not a lot more than median size, 108 and that needs fixing */ 109 110 /* THE NEXT 2 LINES ARE THE HEART OF THE PROGRAM */ 111 112 /* keep x below the value that when multiplied by median size on the next 113 line will equal max_size */ 114 /* the granularity_reducer is to handle the case where max_size is near 115 median_size, since '%' can only take ints, we need this complicated what 116 of handling that for small values of max_size/median_size by making 117 large ints out of small ints temporarily. */ 118 if (max_size / median_size < 1024) 119 granularity_reducer = 1024 * 1024; 120 else 121 granularity_reducer = 1; 122 nr_random = 123 nr_random % 124 ((int) 125 (granularity_reducer * 126 (((double)max_size) / ((double)median_size)))); 127 double_nr_random = ((double)nr_random) / (granularity_reducer); 128 size = 129 median_size * (1 / 130 (1 - 131 (double_nr_random) / (((double)max_size) / 132 ((double)median_size))) - 1); 133 return ((int)size); 134 } 135 136 /* generate a unique filename */ 137 void get_name_by_number(long this_files_number, char *str) 138 { 139 sprintf(str, "%lu", this_files_number); 140 } 141 142 /* make a file of a specified size */ 143 void make_file(int size) 144 { 145 char string[128] = { 0 }; 146 char *str = string; 147 char fname[256]; 148 int fd = 0; 149 int error; 150 static long this_files_number = 1; 151 152 /* collect statistics about the size of files created, or more precisely, the 153 size of files that we will attempt to create. */ 154 if (size <= 100) 155 fsz_0_100++; 156 else if (size <= 1000) 157 fsz_100_1k++; 158 else if (size <= 10 * 1000) 159 fsz_1k_10k++; 160 else if (size <= 100 * 1000) 161 fsz_10k_100k++; 162 else if (size <= 1000 * 1000) 163 fsz_100k_1m++; 164 else if (size <= 10 * 1000 * 1000) 165 fsz_1m_10m++; 166 else 167 fsz_10m_larger++; 168 169 /* construct a name for the file */ 170 get_name_by_number(this_files_number++, str); 171 strcpy(fname, path); 172 strcat(fname, "/"); 173 strcat(fname, str); 174 175 /* open the file, and deal with the various errors that can occur */ 176 177 if ((fd = open(fname, O_CREAT | O_EXCL | O_RDWR, 0777)) == -1) { 178 if (errno == ENOSPC) { 179 if (!already_whined) { 180 printf 181 ("reiser-2021A: out of disk (or inodes) space, will keep trying\n"); 182 already_whined = 1; /* we continue other file creation in out of 183 space conditions */ 184 } 185 return; 186 } 187 /* it is sometimes useful to be able to run this program more than once 188 inside the same directory, and that means skipping over filenames that 189 already exist. Thus we ignore EEXIST, and pay attention to all 190 else. */ 191 if (errno == EEXIST) { /* just skip existing file */ 192 return; 193 } 194 perror("open"); 195 exit(errno); 196 } 197 /* write to the file until it is the right size, handling the various error 198 conditions appropriately */ 199 200 while (size > 0) { 201 size -= (error = 202 write(fd, write_buffer, 203 (size < 204 write_buffer_size - 205 1) ? size : (write_buffer_size - 1))); 206 if (error == -1) { 207 if (errno == ENOSPC) { 208 if (!already_whined) { 209 printf 210 ("reiser-2022: out of disk space, will keep trying\n"); 211 already_whined = 1; 212 } 213 close(fd); 214 return; 215 } 216 perror("write() failed"); 217 exit(errno); 218 } 219 } 220 221 /* close the file */ 222 if (close(fd)) { 223 perror("close() failed"); 224 exit(errno); 225 } 226 } 227 228 /* print the statistics on how many files were created of what size */ 229 230 void print_stats() 231 { 232 if (!stats) 233 return; 234 235 printf("\n"); 236 printf("File stats: Units are decimal (1k = 1000)\n"); 237 printf("files 0-100 : %i\n", fsz_0_100); 238 printf("files 100-1K : %i\n", fsz_100_1k); 239 printf("files 1K-10K : %i\n", fsz_1k_10k); 240 printf("files 10K-100K : %i\n", fsz_10k_100k); 241 printf("files 100K-1M : %i\n", fsz_100k_1m); 242 printf("files 1M-10M : %i\n", fsz_1m_10m); 243 printf("files 10M-larger : %i\n", fsz_10m_larger); 244 printf("total bytes written : %lu\n", byte_total); 245 246 } 247 248 /* predict the number of files that will be created before max_bytes total 249 length of files is reached */ 250 long determine_nr_of_files(int median_file_size, double max_file_size, 251 long bytes_to_consume) 252 { 253 long nr_of_files = 0, byte_total = 0; 254 255 /* the next line is not necessary as 1 is the default, it is just cautious 256 coding */ 257 srand(1); 258 while (byte_total < bytes_to_consume) { 259 byte_total += determine_size(median_file_size, max_file_size); 260 nr_of_files++; 261 } 262 /* reset the random number generator so that when we determine_size() of the 263 files later they will be created with the same "random" sequence used in 264 this calculation */ 265 srand(1); 266 #ifdef DEBUG 267 printf("number of files is %d\n", (int)nr_of_files); 268 #endif /* DEBUG */ 269 fflush(NULL); 270 return nr_of_files; 271 } 272 273 /* fill the current working directory with nr_files_this_directory number of 274 files*/ 275 276 void fill_this_directory(long nr_files_this_directory, long median_file_size, 277 long maximum_size) 278 { 279 long size; 280 281 #ifdef DEBUG 282 printf("filling with %lu files, ", nr_files_this_directory); 283 #endif 284 while (nr_files_this_directory--) { 285 size = determine_size(median_file_size, maximum_size); 286 byte_total += size; 287 make_file(size); 288 } 289 } 290 291 /* this will unfortunately handle out of disk space by forever trying */ 292 /* What we should do in out of space situaltion ? I think we must skip this 293 directory and continue files/dirs creation process. Error value (!= 0) 294 indicates that we can't go to this directory. -zam */ 295 int make_directory(char *dirname) 296 { 297 static long this_directory_number = 0; 298 299 strcpy(tdir, path); 300 strcat(tdir, "/"); 301 strcat(tdir, dirname); 302 303 if (mkdir(tdir, 0755) == -1) { 304 if (errno == ENOSPC) { 305 if (!already_whined) { 306 printf("reiser-2021: out of disk space, "); 307 already_whined = 1; 308 } 309 return errno; 310 } 311 /* it is sometimes useful to be able to run this program more than once 312 inside the same directory, and that means skipping over filenames that 313 already exist. Thus we ignore EEXIST, and pay attention to all else. */ 314 if (errno != EEXIST) { 315 perror("mkdir"); 316 exit(errno); 317 } 318 } 319 sprintf(dirname, "d%lu", this_directory_number++); 320 strcpy(tdir, path); 321 strcat(tdir, "/"); 322 strcat(tdir, dirname); 323 324 return 0; 325 } 326 327 /* assumes we are already chdir'd into a directory that the subtree is rooted 328 at. Fills the directory with files and subdirectories, cd's into those 329 subdirectories, and recurses upon itself */ 330 331 void do_subtree( 332 /* the start and end of the portion of the directory sizes 333 array which corresponds to the sizes of the directories 334 composing this subtree */ 335 /* sizes_end minus sizes_start is equal to the number of 336 directories in this subtree */ 337 long *sizes_start, long *sizes_end, 338 long median_file_size, long maximum_file_size, 339 long median_dir_branching, long max_dir_branching) 340 { 341 long *p; 342 long *sub_start; 343 long *sub_end; 344 int index_subdirectory_to_add_directory_to; 345 long *dirs_in_subtrees; 346 char *subtree_name; 347 long *sizes_index = sizes_start; 348 char subtree_name_array[128]; 349 long this_directory_branching; 350 static long this_directorys_number; 351 352 subtree_name = subtree_name_array; 353 /* fill this directory with its number of files */ 354 fill_this_directory(*sizes_index, median_file_size, maximum_file_size); 355 sizes_index++; 356 /* ok, now randomly assign directories (and their number of files) among the 357 subdirectories that will be created if at least one directory is assigned 358 to it */ 359 360 /* this will cause the random number sequence to not match the one used in 361 determine_nr_files() I need to accumulate my values in an array 362 beforehand. I'll code that later. */ 363 /* worry about whether 0 or 1 is a problem value */ 364 this_directory_branching = 365 determine_size(median_dir_branching, max_dir_branching) + 1; 366 367 /* create an array holding the number of directories assigned to each 368 potential subdirectory */ 369 dirs_in_subtrees = calloc(this_directory_branching, sizeof(long)); 370 while (sizes_index <= sizes_end) { 371 index_subdirectory_to_add_directory_to = 372 (rand() % this_directory_branching); 373 (* 374 (dirs_in_subtrees + index_subdirectory_to_add_directory_to))++; 375 sizes_index++; 376 } 377 /* the +1 is for the fill_directory() we did above */ 378 sizes_index = sizes_start + 1; 379 380 /* go through each potential subdirectory, and if at least one directory has 381 been assigned to it, create it and recurse */ 382 for (p = dirs_in_subtrees; 383 p < (dirs_in_subtrees + this_directory_branching); p++) { 384 if (*p) { 385 int nocd; 386 sprintf(subtree_name, "d%lu", this_directorys_number++); 387 nocd = make_directory(subtree_name); 388 /* if make_dir.. may fails (in out of space situation), we continue 389 creation process in same dir */ 390 if (!nocd) 391 chngdir(subtree_name); 392 sub_start = sizes_index; 393 /* the minus one is because *p is the number of elements and arrays start at 0 */ 394 sub_end = (sizes_index + (*p - 1)); 395 396 #ifdef DEBUG 397 /* comment this back in if the array logic has you going cross-eyed */ 398 /* printf ("sizes_start is %p, sizes_index is %p, sizes_index+p is %p, sizes_end is %p\n", sizes_start, sub_start, sub_end, sizes_end); */ 399 #endif 400 do_subtree(sub_start, sub_end, median_file_size, 401 maximum_file_size, median_dir_branching, 402 max_dir_branching); 403 if (!nocd) 404 chngdir(".."); 405 } 406 sizes_index += *p; 407 } 408 } 409 410 /* We have already determined that nr_files can fit in bytes_to_consume space. 411 Fill the sizes array with the number of files to be in each directory, and 412 then call do_subtree to fill the tree with files and directories. */ 413 414 void make_fractal_tree(long median_file_size, long maximum_file_size, 415 long median_dir_nr_files, long max_dir_nr_files, 416 long median_dir_branching, long max_dir_branching, 417 long nr_files) 418 { 419 long *sizes_start; 420 long *sizes_end; 421 long *sizes_index; 422 long remaining_files = nr_files; 423 424 /* collect together array of directory sizes for whole filesystem. This 425 cannot easily be done recursively without distorting the directory sizes 426 and making deeper directories smaller. Send me the code if you 427 disagree.:-) */ 428 /* we almost certainly don't need this much space, but so what.... */ 429 sizes_index = sizes_start = malloc(nr_files * sizeof(long)); 430 for (; remaining_files > 0;) { 431 *sizes_index = 432 determine_size(median_dir_nr_files, max_dir_nr_files); 433 // we alloc space for nr_files, so we should avoid 434 // number of files in directory = 0 -grev. 435 if (*sizes_index == 0) 436 *sizes_index = 1; 437 *sizes_index = 438 (*sizes_index < 439 remaining_files) ? *sizes_index : remaining_files; 440 441 #ifdef DEBUG 442 printf("*sizes_index == %lu, ", *sizes_index); 443 #endif 444 remaining_files -= *sizes_index; 445 sizes_index++; 446 } 447 /* don't decrement below sizes_start if nr_files is 0 */ 448 sizes_end = (sizes_index-- > sizes_start) ? sizes_index : sizes_start; 449 450 sizes_index = sizes_start; 451 srand(1); 452 do_subtree(sizes_start, sizes_end, median_file_size, maximum_file_size, 453 median_dir_branching, max_dir_branching); 454 455 } 456 457 int main(int argc, char *argv[]) 458 { 459 /* initialized from argv[] */ 460 long median_file_size, 461 median_dir_branching, 462 median_dir_nr_files, 463 max_dir_nr_files, max_dir_branching, max_file_size; 464 long nr_of_files = 0; /* files to be created */ 465 466 if (argc != 11) { 467 print_usage(); 468 exit(1); 469 } 470 471 write_buffer_size = atoi(argv[8]); 472 write_buffer = malloc(write_buffer_size); 473 memset(write_buffer, 'a', write_buffer_size); 474 475 /* the number of bytes that we desire this tree to consume. It will actually 476 consume more, because the last file will overshoot by a random amount, and 477 because the directories and metadata will consume space. */ 478 bytes_to_consume = atol(argv[1]); 479 max_file_size = atol(argv[3]); 480 median_file_size = atol(argv[2]); 481 /* Figure out how many random files will fit into bytes_to_consume bytes. We 482 depend on resetting rand() to get the same result later. */ 483 nr_of_files = 484 determine_nr_of_files(median_file_size, max_file_size, 485 bytes_to_consume); 486 487 strcpy(path, argv[9]); 488 mkdir(path, 0755); 489 stats = atol(argv[10]); 490 median_dir_branching = atol(argv[6]); 491 max_dir_branching = atol(argv[7]); 492 median_dir_nr_files = atol(argv[4]); 493 max_dir_nr_files = atol(argv[5]); 494 make_fractal_tree(median_file_size, max_file_size, median_dir_nr_files, 495 max_dir_nr_files, median_dir_branching, 496 max_dir_branching, nr_of_files); 497 print_stats(); 498 if (stats) 499 printf("\nreiser_fract_tree finished\n"); 500 501 return 0; 502 } 503