Home | History | Annotate | Download | only in pounder21
      1 /*
      2  * The main pounder process controller and scheduler program.
      3  * Author: Darrick Wong <djwong (at) us.ibm.com>
      4  */
      5 
      6 /*
      7  * Copyright (C) 2003-2006 IBM
      8  *
      9  * This program is free software; you can redistribute it and/or
     10  * modify it under the terms of the GNU General Public License as
     11  * published by the Free Software Foundation; either version 2 of the
     12  * License, or (at your option) any later version.
     13  *
     14  * This program is distributed in the hope that it will be useful, but
     15  * WITHOUT ANY WARRANTY; without even the implied warranty of
     16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     17  * General Public License for more details.
     18  *
     19  * You should have received a copy of the GNU General Public License
     20  * along with this program; if not, write to the Free Software
     21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
     22  * 02111-1307, USA.
     23  */
     24 
     25 #include <errno.h>
     26 #include <signal.h>
     27 #include <sys/wait.h>
     28 #include <unistd.h>
     29 #include <fcntl.h>
     30 #include <string.h>
     31 #include <stdlib.h>
     32 #include <ctype.h>
     33 #include <stdarg.h>
     34 #include <sys/time.h>
     35 #include <time.h>
     36 #include <stdio.h>
     37 #include <dirent.h>
     38 #include <sys/stat.h>
     39 
     40 #include "proclist.h"
     41 #include "debug.h"
     42 
     43 // List of subprocesses to wait upon
     44 struct proclist_t wait_ons = { NULL };
     45 struct proclist_t daemons = { NULL };
     46 
     47 static int is_leader = 0;
     48 static char *pidfile = "";
     49 
     50 static inline int is_executable(const char *fname);
     51 static inline int is_directory(const char *fname);
     52 static inline int test_filter(const struct dirent *p);
     53 static inline int test_sort(const struct dirent **a, const struct dirent **b);
     54 static int wait_for_pids(void);
     55 static void wait_for_daemons(void);
     56 static void note_process(pid_t pid, char *name);
     57 static void note_daemon(pid_t pid, char *name);
     58 static void kill_tests(void);
     59 static void kill_daemons(void);
     60 static int process_dir(const char *fname);
     61 static pid_t spawn_test(char *fname);
     62 static void note_child(pid_t pid, char *fname, char type);
     63 static int child_finished(const char *name, int stat);
     64 static char *progname;
     65 
     66 #define TEST_PATH_LEN 512
     67 #define TEST_FORK_WAIT 100
     68 
     69 /**
     70  * Kill everything upon ^C.
     71  */
     72 static void jump_out(int signum)
     73 {
     74 	pounder_fprintf(stdout, "Control-C received; aborting!\n");
     75 	//unlink("pounder_pgrp");
     76 	kill_tests();
     77 	kill_daemons();
     78 	if (is_leader) {
     79 		unlink(pidfile);
     80 	}
     81 	exit(0);
     82 }
     83 
     84 /**
     85  * Kills tests launched from within.
     86  */
     87 static void kill_tests(void)
     88 {
     89 	struct proclist_item_t *curr;
     90 
     91 	curr = wait_ons.head;
     92 	while (curr != NULL) {
     93 		kill(-curr->pid, SIGTERM);
     94 		curr = curr->next;
     95 	}
     96 }
     97 
     98 /**
     99  * Kills daemons launched from within.
    100  */
    101 static void kill_daemons(void)
    102 {
    103 	struct proclist_item_t *curr;
    104 
    105 	curr = daemons.head;
    106 	while (curr != NULL) {
    107 		kill(-curr->pid, SIGTERM);
    108 		curr = curr->next;
    109 	}
    110 }
    111 
    112 /**
    113  * Record the pounder leader's PID in a file.
    114  */
    115 static void record_pid(void)
    116 {
    117 	FILE *fp;
    118 
    119 	pidfile = getenv("POUNDER_PIDFILE");
    120 	if (pidfile == NULL) {
    121 		pidfile = "pounder.pid";
    122 	}
    123 
    124 	fp = fopen(pidfile, "w");
    125 	if (fp == NULL) {
    126 		perror(pidfile);
    127 	}
    128 	fprintf(fp, "%d", getpid());
    129 	fclose(fp);
    130 }
    131 
    132 /**
    133  * Main program.  Returns 1 if all programs run successfully, 0 if
    134  * something failed and -1 if there was an error running programs.
    135  */
    136 int main(int argc, char *argv[])
    137 {
    138 	int retcode;
    139 	struct sigaction zig;
    140 	pid_t pid;
    141 	char *c;
    142 
    143 	/* Check parameters */
    144 	if (argc < 2) {
    145 		fprintf(stderr, "Usage: %s test_prog\n", argv[0]);
    146 		return 1;
    147 	}
    148 
    149 	if (argc > 2 && strcmp(argv[2], "--leader") == 0) {
    150 		pounder_fprintf(stdout,
    151 				"Logging this test output to %s/POUNDERLOG.\n",
    152 				getenv("POUNDER_LOGDIR"));
    153 		is_leader = 1;
    154 		record_pid();
    155 	}
    156 
    157 	progname = argv[0];
    158 
    159 	/* Set up signals */
    160 	memset(&zig, 0x00, sizeof(zig));
    161 	zig.sa_handler = jump_out;
    162 	sigaction(SIGHUP, &zig, NULL);
    163 	sigaction(SIGINT, &zig, NULL);
    164 	sigaction(SIGTERM, &zig, NULL);
    165 
    166 	if (is_directory(argv[1])) {
    167 		retcode = process_dir(argv[1]);
    168 	} else {
    169 		if (is_executable(argv[1])) {
    170 			c = rindex(argv[1], '/');
    171 			c++;
    172 
    173 			// Start the test
    174 			pid = spawn_test(argv[1]);
    175 			if (pid < 0) {
    176 				perror("fork");
    177 				retcode = -1;
    178 				goto out;
    179 			}
    180 			// Track the test
    181 			note_process(pid, argv[1]);
    182 			if (wait_for_pids() == 0) {
    183 				retcode = 1;
    184 			} else {
    185 				retcode = 0;
    186 			}
    187 		} else {
    188 			pounder_fprintf(stderr,
    189 					"%s: Not a directory or a test.\n",
    190 					argv[1]);
    191 			retcode = -1;
    192 		}
    193 	}
    194 
    195 out:
    196 	kill_daemons();
    197 	wait_for_daemons();
    198 	if (is_leader) {
    199 		if (retcode == 0) {
    200 			pounder_fprintf(stdout, "%s: %s.\n", argv[1], pass_msg);
    201 		} else if (retcode < 0 || retcode == 255) {
    202 			pounder_fprintf(stdout, "%s: %s with code %d.\n",
    203 					argv[1], abort_msg, retcode);
    204 		} else {
    205 			pounder_fprintf(stdout, "%s: %s with code %d.\n",
    206 					argv[1], fail_msg, retcode);
    207 		}
    208 		unlink(pidfile);
    209 	}
    210 	exit(retcode);
    211 }
    212 
    213 /**
    214  * Helper function to determine if a file is executable.
    215  * Returns 1 if yes, 0 if no and -1 if error.
    216  */
    217 static inline int is_executable(const char *fname)
    218 {
    219 	struct stat tmp;
    220 
    221 	if (stat(fname, &tmp) < 0) {
    222 		return -1;
    223 	}
    224 
    225 	if (geteuid() == 0) {
    226 		return 1;
    227 	} else if (geteuid() == tmp.st_uid) {
    228 		return tmp.st_mode & S_IXUSR;
    229 	} else if (getegid() == tmp.st_gid) {
    230 		return tmp.st_mode & S_IXGRP;
    231 	} else {
    232 		return tmp.st_mode & S_IXOTH;
    233 	}
    234 }
    235 
    236 /**
    237  * Helper function to determine if a file is a directory.
    238  * Returns 1 if yes, 0 if no and -1 if error.
    239  */
    240 static inline int is_directory(const char *fname)
    241 {
    242 	struct stat tmp;
    243 
    244 	if (stat(fname, &tmp) < 0) {
    245 		return 0;
    246 	}
    247 
    248 	return S_ISDIR(tmp.st_mode);
    249 }
    250 
    251 /**
    252  * Returns 1 if the directory entry's filename fits the test name pattern.
    253  */
    254 static inline int test_filter(const struct dirent *p)
    255 {
    256 	return ((p->d_name[0] == 'T' || p->d_name[0] == 'D')
    257 		&& isdigit(p->d_name[1]) && isdigit(p->d_name[2]));
    258 }
    259 
    260 /**
    261  * Simple routine to compare two tests names such that lower number/name pairs
    262  * are considered "lesser" values.
    263  */
    264 //static inline int test_sort(const struct dirent **a, const struct dirent **b) {
    265 static inline int test_sort(const struct dirent **a, const struct dirent **b)
    266 {
    267 	return strcmp(&(*b)->d_name[1], &(*a)->d_name[1]);
    268 }
    269 
    270 /**
    271  * Takes the wait() status integer and prints a log message.
    272  * Returns 1 if there was a failure.
    273  */
    274 static int child_finished(const char *name, int stat)
    275 {
    276 	int x;
    277 	// did we sig-exit?
    278 	if (WIFSIGNALED(stat)) {
    279 		pounder_fprintf(stdout, "%s: %s on signal %d.\n",
    280 				name, fail_msg, WTERMSIG(stat));
    281 		return 1;
    282 	} else {
    283 		x = WEXITSTATUS(stat);
    284 		if (x == 0) {
    285 			pounder_fprintf(stdout, "%s: %s.\n", name, pass_msg);
    286 			return 0;
    287 		} else if (x < 0 || x == 255) {
    288 			pounder_fprintf(stdout, "%s: %s with code %d.\n",
    289 					name, abort_msg, x);
    290 			return 1;
    291 			// FIXME: add test to blacklist
    292 		} else {
    293 			pounder_fprintf(stdout, "%s: %s with code %d.\n",
    294 					name, fail_msg, x);
    295 			return 1;
    296 		}
    297 	}
    298 }
    299 
    300 /**
    301  * Wait for some number of PIDs.  If any of them return nonzero, we
    302  * assume that there was some kind of failure and return 0.  Otherwise,
    303  * we return 1 to indicate success.
    304  */
    305 static int wait_for_pids(void)
    306 {
    307 	struct proclist_item_t *curr;
    308 	int i, stat, res, nprocs;
    309 	pid_t pid;
    310 
    311 	res = 1;
    312 
    313 	// figure out how many times we have to wait...
    314 	curr = wait_ons.head;
    315 	nprocs = 0;
    316 	while (curr != NULL) {
    317 		nprocs++;
    318 		curr = curr->next;
    319 	}
    320 
    321 	// now wait for children.
    322 	for (i = 0; i < nprocs;) {
    323 		pid = wait(&stat);
    324 
    325 		if (pid < 0) {
    326 			perror("wait");
    327 			return 0;
    328 		}
    329 		// go find the child
    330 		curr = wait_ons.head;
    331 		while (curr != NULL) {
    332 			if (curr->pid == pid) {
    333 				res =
    334 				    (child_finished(curr->name, stat) ? 0 :
    335 				     res);
    336 
    337 				// one less pid to wait for
    338 				i++;
    339 
    340 				// stop observing
    341 				remove_from_proclist(&wait_ons, curr);
    342 				free(curr->name);
    343 				free(curr);
    344 				break;
    345 			}
    346 			curr = curr->next;
    347 		}
    348 
    349 		curr = daemons.head;
    350 		while (curr != NULL) {
    351 			if (curr->pid == pid) {
    352 				child_finished(curr->name, stat);
    353 				remove_from_proclist(&daemons, curr);
    354 				free(curr->name);
    355 				free(curr);
    356 				break;
    357 			}
    358 			curr = curr->next;
    359 		}
    360 	}
    361 
    362 	return res;
    363 }
    364 
    365 /**
    366  * Wait for daemons to finish.  This function does NOT wait for wait_ons.
    367  */
    368 static void wait_for_daemons(void)
    369 {
    370 	struct proclist_item_t *curr;
    371 	int i, stat, res, nprocs;
    372 	pid_t pid;
    373 
    374 	res = 1;
    375 
    376 	// figure out how many times we have to wait...
    377 	curr = daemons.head;
    378 	nprocs = 0;
    379 	while (curr != NULL) {
    380 		nprocs++;
    381 		curr = curr->next;
    382 	}
    383 
    384 	// now wait for daemons.
    385 	for (i = 0; i < nprocs;) {
    386 		pid = wait(&stat);
    387 
    388 		if (pid < 0) {
    389 			perror("wait");
    390 			if (errno == ECHILD) {
    391 				return;
    392 			}
    393 		}
    394 
    395 		curr = daemons.head;
    396 		while (curr != NULL) {
    397 			if (curr->pid == pid) {
    398 				child_finished(curr->name, stat);
    399 				i++;
    400 				remove_from_proclist(&daemons, curr);
    401 				free(curr->name);
    402 				free(curr);
    403 				break;
    404 			}
    405 			curr = curr->next;
    406 		}
    407 	}
    408 }
    409 
    410 /**
    411  * Creates a record of processes that we want to watch for.
    412  */
    413 static void note_process(pid_t pid, char *name)
    414 {
    415 	struct proclist_item_t *it;
    416 
    417 	it = calloc(1, sizeof(struct proclist_item_t));
    418 	if (it == NULL) {
    419 		perror("malloc proclist_item_t");
    420 		// XXX: Maybe we should just waitpid?
    421 		return;
    422 	}
    423 	it->pid = pid;
    424 	it->name = calloc(strlen(name) + 1, sizeof(char));
    425 	if (it->name == NULL) {
    426 		perror("malloc procitem name");
    427 		// XXX: Maybe we should just waitpid?
    428 		return;
    429 	}
    430 	strcpy(it->name, name);
    431 
    432 	add_to_proclist(&wait_ons, it);
    433 }
    434 
    435 /**
    436  * Creates a record of daemons that should be killed on exit.
    437  */
    438 static void note_daemon(pid_t pid, char *name)
    439 {
    440 	struct proclist_item_t *it;
    441 
    442 	it = calloc(1, sizeof(struct proclist_item_t));
    443 	if (it == NULL) {
    444 		perror("malloc proclist_item_t");
    445 		// XXX: what do we do here?
    446 		return;
    447 	}
    448 	it->pid = pid;
    449 	it->name = calloc(strlen(name) + 1, sizeof(char));
    450 	if (it->name == NULL) {
    451 		perror("malloc procitem name");
    452 		// XXX: what do we do here?
    453 		return;
    454 	}
    455 	strcpy(it->name, name);
    456 
    457 	add_to_proclist(&daemons, it);
    458 }
    459 
    460 /**
    461  * Starts a test, with the stdin/out/err fd's redirected to logs.
    462  * The 'fname' parameter should be a relative path from $POUNDER_HOME.
    463  */
    464 static pid_t spawn_test(char *fname)
    465 {
    466 	pid_t pid;
    467 	int fd, tmp;
    468 	char buf[TEST_PATH_LEN], buf2[TEST_PATH_LEN];
    469 	char *last_slash;
    470 
    471 	pid = fork();
    472 	if (pid == 0) {
    473 		if (setpgrp() < 0) {
    474 			perror("setpgid");
    475 		}
    476 
    477 		pounder_fprintf(stdout, "%s: %s test.\n", fname, start_msg);
    478 
    479 		// reroute stdin
    480 		fd = open("/dev/null", O_RDWR);
    481 		if (fd < 0) {
    482 			perror("/dev/null");
    483 			exit(-1);
    484 		}
    485 		close(0);
    486 		tmp = dup2(fd, 0);
    487 		if (tmp < 0) {
    488 			perror("dup(/dev/null)");
    489 			exit(-1);
    490 		}
    491 		close(fd);
    492 
    493 		// generate log name-- '/' -> '-'.
    494 		snprintf(buf2, TEST_PATH_LEN, "%s|%s",
    495 			 getenv("POUNDER_LOGDIR"), fname);
    496 
    497 		fd = strlen(buf2);
    498 		for (tmp = (index(buf2, '|') - buf2); tmp < fd; tmp++) {
    499 			if (buf2[tmp] == '/') {
    500 				buf2[tmp] = '-';
    501 			} else if (buf2[tmp] == '|') {
    502 				buf2[tmp] = '/';
    503 			}
    504 		}
    505 
    506 		// make it so that we have a way to get back to the
    507 		// original console.
    508 		tmp = dup2(1, 3);
    509 		if (tmp < 0) {
    510 			perror("dup(stdout, 3)");
    511 			exit(-1);
    512 		}
    513 		// reroute stdout/stderr
    514 		fd = open(buf2, O_RDWR | O_CREAT | O_TRUNC | O_SYNC,
    515 			  S_IWUSR | S_IRUSR | S_IRGRP | S_IROTH);
    516 		if (fd < 0) {
    517 			perror(buf2);
    518 			exit(-1);
    519 		}
    520 		close(1);
    521 		tmp = dup2(fd, 1);
    522 		if (tmp < 0) {
    523 			perror("dup(log, 1)");
    524 			exit(-1);
    525 		}
    526 		close(2);
    527 		tmp = dup2(fd, 2);
    528 		if (tmp < 0) {
    529 			perror("dup(log, 2)");
    530 			exit(-1);
    531 		}
    532 		close(fd);
    533 
    534 		// let us construct the absolute pathname of the test.
    535 		// first find the current directory
    536 		if (getcwd(buf, TEST_PATH_LEN) == NULL) {
    537 			perror("getcwd");
    538 			exit(-1);
    539 		}
    540 		// then splice cwd + fname
    541 		snprintf(buf2, TEST_PATH_LEN, "%s/%s", buf, fname);
    542 
    543 		// find the location of the last slash
    544 		last_slash = rindex(buf2, '/');
    545 
    546 		if (last_slash != NULL) {
    547 			// copy the filename part into a new buffer
    548 			snprintf(buf, TEST_PATH_LEN, "./%s", last_slash + 1);
    549 
    550 			// truncate at the last slash
    551 			*last_slash = 0;
    552 
    553 			// and chdir
    554 			if (chdir(buf2) != 0) {
    555 				perror(buf2);
    556 				exit(-1);
    557 			}
    558 			// reassign variables
    559 			fname = buf;
    560 		}
    561 		// spawn the process
    562 		execlp(fname, fname, NULL);
    563 
    564 		// If we get here, we can't run the test.
    565 		perror(fname);
    566 		exit(-1);
    567 	}
    568 
    569 	tmp = errno;
    570 	/* yield for a short while, so that the test has
    571 	 * a little bit of time to run.
    572 	 */
    573 	usleep(TEST_FORK_WAIT);
    574 	errno = tmp;
    575 
    576 	return pid;
    577 }
    578 
    579 /**
    580  * Adds a child process to either the running-test or running-daemon
    581  * list.
    582  */
    583 static void note_child(pid_t pid, char *fname, char type)
    584 {
    585 	if (type == 'T') {
    586 		note_process(pid, fname);
    587 	} else if (type == 'D') {
    588 		note_daemon(pid, fname);
    589 	} else {
    590 		pounder_fprintf(stdout,
    591 				"Don't know what to do with child `%s' of type %c.\n",
    592 				fname, type);
    593 	}
    594 }
    595 
    596 /**
    597  * Process a directory--for each entry in a directory, execute files or spawn
    598  * a new copy of ourself on the new directory.  Process execution is subject to
    599  * these rules:
    600  *
    601  * - Test files that start with the same number '00foo' and '00bar' are allowed
    602  *   to run simultaneously.
    603  * - Test files are run in order of number and then name.
    604  *
    605  * If a the fork fails, bit 1 of the return code is set.  If a
    606  * program runs but fails, bit 2 is set.
    607  */
    608 static int process_dir(const char *fname)
    609 {
    610 	struct dirent **namelist;
    611 	int i, result = 0;
    612 	char buf[TEST_PATH_LEN];
    613 	int curr_level_num = -1;
    614 	int test_level_num;
    615 	pid_t pid;
    616 	int children_ok = 1;
    617 
    618 	pounder_fprintf(stdout, "%s: Entering directory.\n", fname);
    619 
    620 	i = scandir(fname, &namelist, test_filter,
    621 		    (int (*)(const void *, const void *))test_sort);
    622 	if (i < 0) {
    623 		perror(fname);
    624 		return -1;
    625 	}
    626 
    627 	while (i--) {
    628 		/* determine level number */
    629 		test_level_num = ((namelist[i]->d_name[1] - '0') * 10)
    630 		    + (namelist[i]->d_name[2] - '0');
    631 
    632 		if (curr_level_num == -1) {
    633 			curr_level_num = test_level_num;
    634 		}
    635 
    636 		if (curr_level_num != test_level_num) {
    637 			children_ok &= wait_for_pids();
    638 			curr_level_num = test_level_num;
    639 		}
    640 
    641 		snprintf(buf, TEST_PATH_LEN, "%s/%s", fname,
    642 			 namelist[i]->d_name);
    643 		if (is_directory(buf)) {
    644 			pid = fork();
    645 			if (pid == 0) {
    646 				if (setpgrp() < 0) {
    647 					perror("setpgid");
    648 				}
    649 				// spawn a new copy of ourself.
    650 				execl(progname, progname, buf, NULL);
    651 
    652 				perror(progname);
    653 				exit(-1);
    654 			}
    655 		} else {
    656 			pid = spawn_test(buf);
    657 		}
    658 
    659 		if (pid < 0) {
    660 			perror("fork");
    661 			result |= 1;
    662 			free(namelist[i]);
    663 			continue;
    664 		}
    665 
    666 		note_child(pid, buf, namelist[i]->d_name[0]);
    667 
    668 		free(namelist[i]);
    669 	}
    670 	free(namelist);
    671 
    672 	/* wait for remaining runners */
    673 	children_ok &= wait_for_pids();
    674 	if (children_ok == 0) {
    675 		result |= 2;
    676 	}
    677 
    678 	pounder_fprintf(stdout, "%s: Leaving directory.\n", fname);
    679 
    680 	return result;
    681 }
    682