1 /* 2 * The main pounder process controller and scheduler program. 3 * Author: Darrick Wong <djwong (at) us.ibm.com> 4 */ 5 6 /* 7 * Copyright (C) 2003-2006 IBM 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License as 11 * published by the Free Software Foundation; either version 2 of the 12 * License, or (at your option) any later version. 13 * 14 * This program is distributed in the hope that it will be useful, but 15 * WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * General Public License for more details. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this program; if not, write to the Free Software 21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 22 * 02111-1307, USA. 23 */ 24 25 #include <errno.h> 26 #include <signal.h> 27 #include <sys/wait.h> 28 #include <unistd.h> 29 #include <fcntl.h> 30 #include <string.h> 31 #include <stdlib.h> 32 #include <ctype.h> 33 #include <stdarg.h> 34 #include <sys/time.h> 35 #include <time.h> 36 #include <stdio.h> 37 #include <dirent.h> 38 #include <sys/stat.h> 39 40 #include "proclist.h" 41 #include "debug.h" 42 43 // List of subprocesses to wait upon 44 struct proclist_t wait_ons = { NULL }; 45 struct proclist_t daemons = { NULL }; 46 47 static int is_leader = 0; 48 static char *pidfile = ""; 49 50 static inline int is_executable(const char *fname); 51 static inline int is_directory(const char *fname); 52 static inline int test_filter(const struct dirent *p); 53 static inline int test_sort(const struct dirent **a, const struct dirent **b); 54 static int wait_for_pids(void); 55 static void wait_for_daemons(void); 56 static void note_process(pid_t pid, char *name); 57 static void note_daemon(pid_t pid, char *name); 58 static void kill_tests(void); 59 static void kill_daemons(void); 60 static int process_dir(const char *fname); 61 static pid_t spawn_test(char *fname); 62 static void note_child(pid_t pid, char *fname, char type); 63 static int child_finished(const char *name, int stat); 64 static char *progname; 65 66 #define TEST_PATH_LEN 512 67 #define TEST_FORK_WAIT 100 68 69 /** 70 * Kill everything upon ^C. 71 */ 72 static void jump_out(int signum) 73 { 74 pounder_fprintf(stdout, "Control-C received; aborting!\n"); 75 //unlink("pounder_pgrp"); 76 kill_tests(); 77 kill_daemons(); 78 if (is_leader) { 79 unlink(pidfile); 80 } 81 exit(0); 82 } 83 84 /** 85 * Kills tests launched from within. 86 */ 87 static void kill_tests(void) 88 { 89 struct proclist_item_t *curr; 90 91 curr = wait_ons.head; 92 while (curr != NULL) { 93 kill(-curr->pid, SIGTERM); 94 curr = curr->next; 95 } 96 } 97 98 /** 99 * Kills daemons launched from within. 100 */ 101 static void kill_daemons(void) 102 { 103 struct proclist_item_t *curr; 104 105 curr = daemons.head; 106 while (curr != NULL) { 107 kill(-curr->pid, SIGTERM); 108 curr = curr->next; 109 } 110 } 111 112 /** 113 * Record the pounder leader's PID in a file. 114 */ 115 static void record_pid(void) 116 { 117 FILE *fp; 118 119 pidfile = getenv("POUNDER_PIDFILE"); 120 if (pidfile == NULL) { 121 pidfile = "pounder.pid"; 122 } 123 124 fp = fopen(pidfile, "w"); 125 if (fp == NULL) { 126 perror(pidfile); 127 } 128 fprintf(fp, "%d", getpid()); 129 fclose(fp); 130 } 131 132 /** 133 * Main program. Returns 1 if all programs run successfully, 0 if 134 * something failed and -1 if there was an error running programs. 135 */ 136 int main(int argc, char *argv[]) 137 { 138 int retcode; 139 struct sigaction zig; 140 pid_t pid; 141 char *c; 142 143 /* Check parameters */ 144 if (argc < 2) { 145 fprintf(stderr, "Usage: %s test_prog\n", argv[0]); 146 return 1; 147 } 148 149 if (argc > 2 && strcmp(argv[2], "--leader") == 0) { 150 pounder_fprintf(stdout, 151 "Logging this test output to %s/POUNDERLOG.\n", 152 getenv("POUNDER_LOGDIR")); 153 is_leader = 1; 154 record_pid(); 155 } 156 157 progname = argv[0]; 158 159 /* Set up signals */ 160 memset(&zig, 0x00, sizeof(zig)); 161 zig.sa_handler = jump_out; 162 sigaction(SIGHUP, &zig, NULL); 163 sigaction(SIGINT, &zig, NULL); 164 sigaction(SIGTERM, &zig, NULL); 165 166 if (is_directory(argv[1])) { 167 retcode = process_dir(argv[1]); 168 } else { 169 if (is_executable(argv[1])) { 170 c = rindex(argv[1], '/'); 171 c++; 172 173 // Start the test 174 pid = spawn_test(argv[1]); 175 if (pid < 0) { 176 perror("fork"); 177 retcode = -1; 178 goto out; 179 } 180 // Track the test 181 note_process(pid, argv[1]); 182 if (wait_for_pids() == 0) { 183 retcode = 1; 184 } else { 185 retcode = 0; 186 } 187 } else { 188 pounder_fprintf(stderr, 189 "%s: Not a directory or a test.\n", 190 argv[1]); 191 retcode = -1; 192 } 193 } 194 195 out: 196 kill_daemons(); 197 wait_for_daemons(); 198 if (is_leader) { 199 if (retcode == 0) { 200 pounder_fprintf(stdout, "%s: %s.\n", argv[1], pass_msg); 201 } else if (retcode < 0 || retcode == 255) { 202 pounder_fprintf(stdout, "%s: %s with code %d.\n", 203 argv[1], abort_msg, retcode); 204 } else { 205 pounder_fprintf(stdout, "%s: %s with code %d.\n", 206 argv[1], fail_msg, retcode); 207 } 208 unlink(pidfile); 209 } 210 exit(retcode); 211 } 212 213 /** 214 * Helper function to determine if a file is executable. 215 * Returns 1 if yes, 0 if no and -1 if error. 216 */ 217 static inline int is_executable(const char *fname) 218 { 219 struct stat tmp; 220 221 if (stat(fname, &tmp) < 0) { 222 return -1; 223 } 224 225 if (geteuid() == 0) { 226 return 1; 227 } else if (geteuid() == tmp.st_uid) { 228 return tmp.st_mode & S_IXUSR; 229 } else if (getegid() == tmp.st_gid) { 230 return tmp.st_mode & S_IXGRP; 231 } else { 232 return tmp.st_mode & S_IXOTH; 233 } 234 } 235 236 /** 237 * Helper function to determine if a file is a directory. 238 * Returns 1 if yes, 0 if no and -1 if error. 239 */ 240 static inline int is_directory(const char *fname) 241 { 242 struct stat tmp; 243 244 if (stat(fname, &tmp) < 0) { 245 return 0; 246 } 247 248 return S_ISDIR(tmp.st_mode); 249 } 250 251 /** 252 * Returns 1 if the directory entry's filename fits the test name pattern. 253 */ 254 static inline int test_filter(const struct dirent *p) 255 { 256 return ((p->d_name[0] == 'T' || p->d_name[0] == 'D') 257 && isdigit(p->d_name[1]) && isdigit(p->d_name[2])); 258 } 259 260 /** 261 * Simple routine to compare two tests names such that lower number/name pairs 262 * are considered "lesser" values. 263 */ 264 //static inline int test_sort(const struct dirent **a, const struct dirent **b) { 265 static inline int test_sort(const struct dirent **a, const struct dirent **b) 266 { 267 return strcmp(&(*b)->d_name[1], &(*a)->d_name[1]); 268 } 269 270 /** 271 * Takes the wait() status integer and prints a log message. 272 * Returns 1 if there was a failure. 273 */ 274 static int child_finished(const char *name, int stat) 275 { 276 int x; 277 // did we sig-exit? 278 if (WIFSIGNALED(stat)) { 279 pounder_fprintf(stdout, "%s: %s on signal %d.\n", 280 name, fail_msg, WTERMSIG(stat)); 281 return 1; 282 } else { 283 x = WEXITSTATUS(stat); 284 if (x == 0) { 285 pounder_fprintf(stdout, "%s: %s.\n", name, pass_msg); 286 return 0; 287 } else if (x < 0 || x == 255) { 288 pounder_fprintf(stdout, "%s: %s with code %d.\n", 289 name, abort_msg, x); 290 return 1; 291 // FIXME: add test to blacklist 292 } else { 293 pounder_fprintf(stdout, "%s: %s with code %d.\n", 294 name, fail_msg, x); 295 return 1; 296 } 297 } 298 } 299 300 /** 301 * Wait for some number of PIDs. If any of them return nonzero, we 302 * assume that there was some kind of failure and return 0. Otherwise, 303 * we return 1 to indicate success. 304 */ 305 static int wait_for_pids(void) 306 { 307 struct proclist_item_t *curr; 308 int i, stat, res, nprocs; 309 pid_t pid; 310 311 res = 1; 312 313 // figure out how many times we have to wait... 314 curr = wait_ons.head; 315 nprocs = 0; 316 while (curr != NULL) { 317 nprocs++; 318 curr = curr->next; 319 } 320 321 // now wait for children. 322 for (i = 0; i < nprocs;) { 323 pid = wait(&stat); 324 325 if (pid < 0) { 326 perror("wait"); 327 return 0; 328 } 329 // go find the child 330 curr = wait_ons.head; 331 while (curr != NULL) { 332 if (curr->pid == pid) { 333 res = 334 (child_finished(curr->name, stat) ? 0 : 335 res); 336 337 // one less pid to wait for 338 i++; 339 340 // stop observing 341 remove_from_proclist(&wait_ons, curr); 342 free(curr->name); 343 free(curr); 344 break; 345 } 346 curr = curr->next; 347 } 348 349 curr = daemons.head; 350 while (curr != NULL) { 351 if (curr->pid == pid) { 352 child_finished(curr->name, stat); 353 remove_from_proclist(&daemons, curr); 354 free(curr->name); 355 free(curr); 356 break; 357 } 358 curr = curr->next; 359 } 360 } 361 362 return res; 363 } 364 365 /** 366 * Wait for daemons to finish. This function does NOT wait for wait_ons. 367 */ 368 static void wait_for_daemons(void) 369 { 370 struct proclist_item_t *curr; 371 int i, stat, res, nprocs; 372 pid_t pid; 373 374 res = 1; 375 376 // figure out how many times we have to wait... 377 curr = daemons.head; 378 nprocs = 0; 379 while (curr != NULL) { 380 nprocs++; 381 curr = curr->next; 382 } 383 384 // now wait for daemons. 385 for (i = 0; i < nprocs;) { 386 pid = wait(&stat); 387 388 if (pid < 0) { 389 perror("wait"); 390 if (errno == ECHILD) { 391 return; 392 } 393 } 394 395 curr = daemons.head; 396 while (curr != NULL) { 397 if (curr->pid == pid) { 398 child_finished(curr->name, stat); 399 i++; 400 remove_from_proclist(&daemons, curr); 401 free(curr->name); 402 free(curr); 403 break; 404 } 405 curr = curr->next; 406 } 407 } 408 } 409 410 /** 411 * Creates a record of processes that we want to watch for. 412 */ 413 static void note_process(pid_t pid, char *name) 414 { 415 struct proclist_item_t *it; 416 417 it = calloc(1, sizeof(struct proclist_item_t)); 418 if (it == NULL) { 419 perror("malloc proclist_item_t"); 420 // XXX: Maybe we should just waitpid? 421 return; 422 } 423 it->pid = pid; 424 it->name = calloc(strlen(name) + 1, sizeof(char)); 425 if (it->name == NULL) { 426 perror("malloc procitem name"); 427 // XXX: Maybe we should just waitpid? 428 return; 429 } 430 strcpy(it->name, name); 431 432 add_to_proclist(&wait_ons, it); 433 } 434 435 /** 436 * Creates a record of daemons that should be killed on exit. 437 */ 438 static void note_daemon(pid_t pid, char *name) 439 { 440 struct proclist_item_t *it; 441 442 it = calloc(1, sizeof(struct proclist_item_t)); 443 if (it == NULL) { 444 perror("malloc proclist_item_t"); 445 // XXX: what do we do here? 446 return; 447 } 448 it->pid = pid; 449 it->name = calloc(strlen(name) + 1, sizeof(char)); 450 if (it->name == NULL) { 451 perror("malloc procitem name"); 452 // XXX: what do we do here? 453 return; 454 } 455 strcpy(it->name, name); 456 457 add_to_proclist(&daemons, it); 458 } 459 460 /** 461 * Starts a test, with the stdin/out/err fd's redirected to logs. 462 * The 'fname' parameter should be a relative path from $POUNDER_HOME. 463 */ 464 static pid_t spawn_test(char *fname) 465 { 466 pid_t pid; 467 int fd, tmp; 468 char buf[TEST_PATH_LEN], buf2[TEST_PATH_LEN]; 469 char *last_slash; 470 471 pid = fork(); 472 if (pid == 0) { 473 if (setpgrp() < 0) { 474 perror("setpgid"); 475 } 476 477 pounder_fprintf(stdout, "%s: %s test.\n", fname, start_msg); 478 479 // reroute stdin 480 fd = open("/dev/null", O_RDWR); 481 if (fd < 0) { 482 perror("/dev/null"); 483 exit(-1); 484 } 485 close(0); 486 tmp = dup2(fd, 0); 487 if (tmp < 0) { 488 perror("dup(/dev/null)"); 489 exit(-1); 490 } 491 close(fd); 492 493 // generate log name-- '/' -> '-'. 494 snprintf(buf2, TEST_PATH_LEN, "%s|%s", 495 getenv("POUNDER_LOGDIR"), fname); 496 497 fd = strlen(buf2); 498 for (tmp = (index(buf2, '|') - buf2); tmp < fd; tmp++) { 499 if (buf2[tmp] == '/') { 500 buf2[tmp] = '-'; 501 } else if (buf2[tmp] == '|') { 502 buf2[tmp] = '/'; 503 } 504 } 505 506 // make it so that we have a way to get back to the 507 // original console. 508 tmp = dup2(1, 3); 509 if (tmp < 0) { 510 perror("dup(stdout, 3)"); 511 exit(-1); 512 } 513 // reroute stdout/stderr 514 fd = open(buf2, O_RDWR | O_CREAT | O_TRUNC | O_SYNC, 515 S_IWUSR | S_IRUSR | S_IRGRP | S_IROTH); 516 if (fd < 0) { 517 perror(buf2); 518 exit(-1); 519 } 520 close(1); 521 tmp = dup2(fd, 1); 522 if (tmp < 0) { 523 perror("dup(log, 1)"); 524 exit(-1); 525 } 526 close(2); 527 tmp = dup2(fd, 2); 528 if (tmp < 0) { 529 perror("dup(log, 2)"); 530 exit(-1); 531 } 532 close(fd); 533 534 // let us construct the absolute pathname of the test. 535 // first find the current directory 536 if (getcwd(buf, TEST_PATH_LEN) == NULL) { 537 perror("getcwd"); 538 exit(-1); 539 } 540 // then splice cwd + fname 541 snprintf(buf2, TEST_PATH_LEN, "%s/%s", buf, fname); 542 543 // find the location of the last slash 544 last_slash = rindex(buf2, '/'); 545 546 if (last_slash != NULL) { 547 // copy the filename part into a new buffer 548 snprintf(buf, TEST_PATH_LEN, "./%s", last_slash + 1); 549 550 // truncate at the last slash 551 *last_slash = 0; 552 553 // and chdir 554 if (chdir(buf2) != 0) { 555 perror(buf2); 556 exit(-1); 557 } 558 // reassign variables 559 fname = buf; 560 } 561 // spawn the process 562 execlp(fname, fname, NULL); 563 564 // If we get here, we can't run the test. 565 perror(fname); 566 exit(-1); 567 } 568 569 tmp = errno; 570 /* yield for a short while, so that the test has 571 * a little bit of time to run. 572 */ 573 usleep(TEST_FORK_WAIT); 574 errno = tmp; 575 576 return pid; 577 } 578 579 /** 580 * Adds a child process to either the running-test or running-daemon 581 * list. 582 */ 583 static void note_child(pid_t pid, char *fname, char type) 584 { 585 if (type == 'T') { 586 note_process(pid, fname); 587 } else if (type == 'D') { 588 note_daemon(pid, fname); 589 } else { 590 pounder_fprintf(stdout, 591 "Don't know what to do with child `%s' of type %c.\n", 592 fname, type); 593 } 594 } 595 596 /** 597 * Process a directory--for each entry in a directory, execute files or spawn 598 * a new copy of ourself on the new directory. Process execution is subject to 599 * these rules: 600 * 601 * - Test files that start with the same number '00foo' and '00bar' are allowed 602 * to run simultaneously. 603 * - Test files are run in order of number and then name. 604 * 605 * If a the fork fails, bit 1 of the return code is set. If a 606 * program runs but fails, bit 2 is set. 607 */ 608 static int process_dir(const char *fname) 609 { 610 struct dirent **namelist; 611 int i, result = 0; 612 char buf[TEST_PATH_LEN]; 613 int curr_level_num = -1; 614 int test_level_num; 615 pid_t pid; 616 int children_ok = 1; 617 618 pounder_fprintf(stdout, "%s: Entering directory.\n", fname); 619 620 i = scandir(fname, &namelist, test_filter, 621 (int (*)(const void *, const void *))test_sort); 622 if (i < 0) { 623 perror(fname); 624 return -1; 625 } 626 627 while (i--) { 628 /* determine level number */ 629 test_level_num = ((namelist[i]->d_name[1] - '0') * 10) 630 + (namelist[i]->d_name[2] - '0'); 631 632 if (curr_level_num == -1) { 633 curr_level_num = test_level_num; 634 } 635 636 if (curr_level_num != test_level_num) { 637 children_ok &= wait_for_pids(); 638 curr_level_num = test_level_num; 639 } 640 641 snprintf(buf, TEST_PATH_LEN, "%s/%s", fname, 642 namelist[i]->d_name); 643 if (is_directory(buf)) { 644 pid = fork(); 645 if (pid == 0) { 646 if (setpgrp() < 0) { 647 perror("setpgid"); 648 } 649 // spawn a new copy of ourself. 650 execl(progname, progname, buf, NULL); 651 652 perror(progname); 653 exit(-1); 654 } 655 } else { 656 pid = spawn_test(buf); 657 } 658 659 if (pid < 0) { 660 perror("fork"); 661 result |= 1; 662 free(namelist[i]); 663 continue; 664 } 665 666 note_child(pid, buf, namelist[i]->d_name[0]); 667 668 free(namelist[i]); 669 } 670 free(namelist); 671 672 /* wait for remaining runners */ 673 children_ok &= wait_for_pids(); 674 if (children_ok == 0) { 675 result |= 2; 676 } 677 678 pounder_fprintf(stdout, "%s: Leaving directory.\n", fname); 679 680 return result; 681 } 682