1 /* 2 * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms of version 2 of the GNU General Public License as 6 * published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it would be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 11 * 12 * Further, this software is distributed without any warranty that it is 13 * free of the rightful claim of any third person regarding infringement 14 * or the like. Any license provided herein, whether implied or 15 * otherwise, applies only to this software file. Patent licenses, if 16 * any, provided herein do not apply to combinations of this program with 17 * other software, or any other product whatsoever. 18 * 19 * You should have received a copy of the GNU General Public License along 20 * with this program; if not, write the Free Software Foundation, Inc., 21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 22 * 23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, 24 * Mountain View, CA 94043, or: 25 * 26 * http://www.sgi.com 27 * 28 * For further information regarding this notice, see: 29 * 30 * http://oss.sgi.com/projects/GenInfo/NoticeExplan/ 31 */ 32 /* 33 * doio - a general purpose io initiator with system call and 34 * write logging. See doio.h for the structure which defines 35 * what doio requests should look like. 36 * 37 * Currently doio can handle read,write,reada,writea,ssread, 38 * sswrite, and many varieties of listio requests. 39 * For disk io, if the O_SSD flag is set doio will allocate 40 * the appropriate amount of ssd and do the transfer - thus, doio 41 * can handle all of the primitive types of file io. 42 * 43 * programming 44 * notes: 45 * ----------- 46 * messages should generally be printed using doio_fprintf(). 47 * 48 */ 49 50 #include <stdio.h> 51 #include <errno.h> 52 #include <fcntl.h> 53 #include <stdlib.h> 54 #include <signal.h> 55 #include <string.h> 56 #include <ctype.h> 57 #include <unistd.h> 58 #include <time.h> 59 #include <stdarg.h> 60 #include <sys/stat.h> 61 #include <sys/param.h> 62 #include <sys/types.h> 63 #include <sys/sysmacros.h> 64 #ifdef CRAY 65 #include <sys/iosw.h> 66 #endif 67 #ifdef sgi 68 #include <aio.h> /* for aio_read,write */ 69 #include <inttypes.h> /* for uint64_t type */ 70 #include <siginfo.h> /* signal handlers & SA_SIGINFO */ 71 #endif 72 #ifndef CRAY 73 #include <sys/uio.h> /* for struct iovec (readv) */ 74 #include <sys/mman.h> /* for mmap(2) */ 75 #include <sys/ipc.h> /* for i/o buffer in shared memory */ 76 #include <sys/shm.h> /* for i/o buffer in shared memory */ 77 #endif 78 #include <sys/wait.h> 79 #ifdef CRAY 80 #include <sys/listio.h> 81 #include <sys/panic.h> 82 #endif 83 #include <sys/time.h> /* for delays */ 84 85 #include "doio.h" 86 #include "write_log.h" 87 #include "random_range.h" 88 #include "string_to_tokens.h" 89 #include "pattern.h" 90 91 #define NMEMALLOC 32 92 #define MEM_DATA 1 /* data space */ 93 #define MEM_SHMEM 2 /* System V shared memory */ 94 #define MEM_T3ESHMEM 3 /* T3E Shared Memory */ 95 #define MEM_MMAP 4 /* mmap(2) */ 96 97 #define MEMF_PRIVATE 0001 98 #define MEMF_AUTORESRV 0002 99 #define MEMF_LOCAL 0004 100 #define MEMF_SHARED 0010 101 102 #define MEMF_FIXADDR 0100 103 #define MEMF_ADDR 0200 104 #define MEMF_AUTOGROW 0400 105 #define MEMF_FILE 01000 /* regular file -- unlink on close */ 106 #define MEMF_MPIN 010000 /* use mpin(2) to lock pages in memory */ 107 108 struct memalloc { 109 int memtype; 110 int flags; 111 int nblks; 112 char *name; 113 void *space; /* memory address of allocated space */ 114 int fd; /* FD open for mmaping */ 115 int size; 116 } Memalloc[NMEMALLOC]; 117 118 /* 119 * Structure for maintaining open file test descriptors. Used by 120 * alloc_fd(). 121 */ 122 123 struct fd_cache { 124 char c_file[MAX_FNAME_LENGTH + 1]; 125 int c_oflags; 126 int c_fd; 127 long c_rtc; 128 #ifdef sgi 129 int c_memalign; /* from F_DIOINFO */ 130 int c_miniosz; 131 int c_maxiosz; 132 #endif 133 #ifndef CRAY 134 void *c_memaddr; /* mmapped address */ 135 int c_memlen; /* length of above region */ 136 #endif 137 }; 138 139 /* 140 * Name-To-Value map 141 * Used to map cmdline arguments to values 142 */ 143 struct smap { 144 char *string; 145 int value; 146 }; 147 148 struct aio_info { 149 int busy; 150 int id; 151 int fd; 152 int strategy; 153 volatile int done; 154 #ifdef CRAY 155 struct iosw iosw; 156 #endif 157 #ifdef sgi 158 aiocb_t aiocb; 159 int aio_ret; /* from aio_return */ 160 int aio_errno; /* from aio_error */ 161 #endif 162 int sig; 163 int signalled; 164 struct sigaction osa; 165 }; 166 167 /* --------------------------------------------------------------------------- 168 * 169 * A new paradigm of doing the r/w system call where there is a "stub" 170 * function that builds the info for the system call, then does the system 171 * call; this is called by code that is common to all system calls and does 172 * the syscall return checking, async I/O wait, iosw check, etc. 173 * 174 * Flags: 175 * WRITE, ASYNC, SSD/SDS, 176 * FILE_LOCK, WRITE_LOG, VERIFY_DATA, 177 */ 178 179 struct status { 180 int rval; /* syscall return */ 181 int err; /* errno */ 182 int *aioid; /* list of async I/O structures */ 183 }; 184 185 struct syscall_info { 186 char *sy_name; 187 int sy_type; 188 struct status *(*sy_syscall) (); 189 int (*sy_buffer) (); 190 char *(*sy_format) (); 191 int sy_flags; 192 int sy_bits; 193 }; 194 195 #define SY_WRITE 00001 196 #define SY_ASYNC 00010 197 #define SY_IOSW 00020 198 #define SY_SDS 00100 199 200 #ifndef O_SSD 201 #define O_SSD 0 /* so code compiles on a CRAY2 */ 202 #endif 203 204 #ifdef sgi 205 #define UINT64_T uint64_t 206 #else 207 #define UINT64_T unsigned long 208 #endif 209 210 #ifndef O_PARALLEL 211 #define O_PARALLEL 0 /* so O_PARALLEL may be used in expressions */ 212 #endif 213 214 #define PPID_CHECK_INTERVAL 5 /* check ppid every <-- iterations */ 215 #define MAX_AIO 256 /* maximum number of async I/O ops */ 216 #ifdef _CRAYMPP 217 #define MPP_BUMP 16 /* page un-alignment for MPP */ 218 #else 219 #define MPP_BUMP 0 220 #endif 221 222 #define SYSERR strerror(errno) 223 224 /* 225 * getopt() string of supported cmdline arguments. 226 */ 227 228 #define OPTS "aC:d:ehm:n:kr:w:vU:V:M:N:" 229 230 #define DEF_RELEASE_INTERVAL 0 231 232 /* 233 * Flags set in parse_cmdline() to indicate which options were selected 234 * on the cmdline. 235 */ 236 237 int a_opt = 0; /* abort on data compare errors */ 238 int e_opt = 0; /* exec() after fork()'ing */ 239 int C_opt = 0; /* Data Check Type */ 240 int d_opt = 0; /* delay between operations */ 241 int k_opt = 0; /* lock file regions during writes */ 242 int m_opt = 0; /* generate periodic messages */ 243 int n_opt = 0; /* nprocs */ 244 int r_opt = 0; /* resource release interval */ 245 int w_opt = 0; /* file write log file */ 246 int v_opt = 0; /* verify writes if set */ 247 int U_opt = 0; /* upanic() on varios conditions */ 248 int V_opt = 0; /* over-ride default validation fd type */ 249 int M_opt = 0; /* data buffer allocation types */ 250 char TagName[40]; /* name of this doio (see Monster) */ 251 252 /* 253 * Misc globals initialized in parse_cmdline() 254 */ 255 256 char *Prog = NULL; /* set up in parse_cmdline() */ 257 int Upanic_Conditions; /* set by args to -U */ 258 int Release_Interval; /* arg to -r */ 259 int Nprocs; /* arg to -n */ 260 char *Write_Log; /* arg to -w */ 261 char *Infile; /* input file (defaults to stdin) */ 262 int *Children; /* pids of child procs */ 263 int Nchildren = 0; 264 int Nsiblings = 0; /* tfork'ed siblings */ 265 int Execd = 0; 266 int Message_Interval = 0; 267 int Npes = 0; /* non-zero if built as an mpp multi-pe app */ 268 int Vpe = -1; /* Virtual pe number if Npes >= 0 */ 269 int Reqno = 1; /* request # - used in some error messages */ 270 int Reqskipcnt = 0; /* count of I/O requests that are skipped */ 271 int Validation_Flags; 272 char *(*Data_Check) (); /* function to call for data checking */ 273 int (*Data_Fill) (); /* function to call for data filling */ 274 int Nmemalloc = 0; /* number of memory allocation strategies */ 275 int delayop = 0; /* delay between operations - type of delay */ 276 int delaytime = 0; /* delay between operations - how long */ 277 278 struct wlog_file Wlog; 279 280 int active_mmap_rw = 0; /* Indicates that mmapped I/O is occurring. */ 281 /* Used by sigbus_action() in the child doio. */ 282 int havesigint = 0; 283 284 #define SKIP_REQ -2 /* skip I/O request */ 285 286 /* 287 * Global file descriptors 288 */ 289 290 int Wfd_Append; /* for appending to the write-log */ 291 int Wfd_Random; /* for overlaying write-log entries */ 292 293 #define FD_ALLOC_INCR 32 /* allocate this many fd_map structs */ 294 /* at a time */ 295 296 /* 297 * Globals for tracking Sds and Core usage 298 */ 299 300 char *Memptr; /* ptr to core buffer space */ 301 int Memsize; /* # bytes pointed to by Memptr */ 302 /* maintained by alloc_mem() */ 303 304 int Sdsptr; /* sds offset (always 0) */ 305 int Sdssize; /* # bytes of allocated sds space */ 306 /* Maintained by alloc_sds() */ 307 char Host[16]; 308 char Pattern[128]; 309 int Pattern_Length; 310 311 /* 312 * Signal handlers, and related globals 313 */ 314 315 char *syserrno(int err); 316 void doio(void); 317 void doio_delay(void); 318 char *format_oflags(int oflags); 319 char *format_strat(int strategy); 320 char *format_rw(struct io_req *ioreq, int fd, void *buffer, 321 int signo, char *pattern, void *iosw); 322 #ifdef CRAY 323 char *format_sds(struct io_req *ioreq, void *buffer, int sds char *pattern); 324 #endif /* CRAY */ 325 326 int do_read(struct io_req *req); 327 int do_write(struct io_req *req); 328 int lock_file_region(char *fname, int fd, int type, int start, int nbytes); 329 330 #ifdef CRAY 331 char *format_listio(struct io_req *ioreq, int lcmd, 332 struct listreq *list, int nent, int fd, char *pattern); 333 #endif /* CRAY */ 334 335 int do_listio(struct io_req *req); 336 337 #if defined(_CRAY1) || defined(CRAY) 338 int do_ssdio(struct io_req *req); 339 #endif /* defined(_CRAY1) || defined(CRAY) */ 340 341 char *fmt_ioreq(struct io_req *ioreq, struct syscall_info *sy, int fd); 342 343 #ifdef CRAY 344 struct status *sy_listio(struct io_req *req, struct syscall_info *sysc, 345 int fd, char *addr); 346 int listio_mem(struct io_req *req, int offset, int fmstride, 347 int *min, int *max); 348 char *fmt_listio(struct io_req *req, struct syscall_info *sy, 349 int fd, char *addr); 350 #endif /* CRAY */ 351 352 #ifdef sgi 353 struct status *sy_pread(struct io_req *req, struct syscall_info *sysc, 354 int fd, char *addr); 355 struct status *sy_pwrite(struct io_req *req, struct syscall_info *sysc, 356 int fd, char *addr); 357 char *fmt_pread(struct io_req *req, struct syscall_info *sy, 358 int fd, char *addr); 359 #endif /* sgi */ 360 361 #ifndef CRAY 362 struct status *sy_readv(struct io_req *req, struct syscall_info *sysc, 363 int fd, char *addr); 364 struct status *sy_writev(struct io_req *req, struct syscall_info *sysc, 365 int fd, char *addr); 366 struct status *sy_rwv(struct io_req *req, struct syscall_info *sysc, 367 int fd, char *addr, int rw); 368 char *fmt_readv(struct io_req *req, struct syscall_info *sy, 369 int fd, char *addr); 370 #endif /* !CRAY */ 371 372 #ifdef sgi 373 struct status *sy_aread(struct io_req *req, struct syscall_info *sysc, 374 int fd, char *addr); 375 struct status *sy_awrite(struct io_req *req, struct syscall_info *sysc, 376 int fd, char *addr) 377 struct status *sy_arw(struct io_req *req, struct syscall_info *sysc, 378 int fd, char *addr, int rw); 379 char *fmt_aread(struct io_req *req, struct syscall_info *sy, 380 int fd, char *addr); 381 #endif /* sgi */ 382 383 #ifndef CRAY 384 struct status *sy_mmread(struct io_req *req, struct syscall_info *sysc, 385 int fd, char *addr); 386 struct status *sy_mmwrite(struct io_req *req, struct syscall_info *sysc, 387 int fd, char *addr); 388 struct status *sy_mmrw(struct io_req *req, struct syscall_info *sysc, 389 int fd, char *addr, int rw); 390 char *fmt_mmrw(struct io_req *req, struct syscall_info *sy, int fd, char *addr); 391 #endif /* !CRAY */ 392 393 int do_rw(struct io_req *req); 394 395 #ifdef sgi 396 int do_fcntl(struct io_req *req); 397 #endif /* sgi */ 398 399 #ifndef CRAY 400 int do_sync(struct io_req *req); 401 #endif /* !CRAY */ 402 403 int doio_pat_fill(char *addr, int mem_needed, char *Pattern, 404 int Pattern_Length, int shift); 405 char *doio_pat_check(char *buf, int offset, int length, 406 char *pattern, int pattern_length, int patshift); 407 char *check_file(char *file, int offset, int length, char *pattern, 408 int pattern_length, int patshift, int fsa); 409 int doio_fprintf(FILE * stream, char *format, ...); 410 int alloc_mem(int nbytes); 411 412 #if defined(_CRAY1) || defined(CRAY) 413 int alloc_sds(int nbytes); 414 #endif /* defined(_CRAY1) || defined(CRAY) */ 415 416 int alloc_fd(char *file, int oflags); 417 struct fd_cache *alloc_fdcache(char *file, int oflags); 418 419 #ifdef sgi 420 void signal_info(int sig, siginfo_t * info, void *v); 421 void cleanup_handler(int sig, siginfo_t * info, void *v); 422 void die_handler(int sig, siginfo_t * info, void *v); 423 void sigbus_handler(int sig, siginfo_t * info, void *v); 424 #else /* !sgi */ 425 void cleanup_handler(int sig); 426 void die_handler(int sig); 427 428 #ifndef CRAY 429 void sigbus_handler(int sig); 430 #endif /* !CRAY */ 431 #endif /* sgi */ 432 433 void noop_handler(int sig); 434 void sigint_handler(int sig); 435 void aio_handler(int sig); 436 void dump_aio(void); 437 438 #ifdef sgi 439 void cb_handler(sigval_t val); 440 #endif /* sgi */ 441 442 struct aio_info *aio_slot(int aio_id); 443 int aio_register(int fd, int strategy, int sig); 444 int aio_unregister(int aio_id); 445 446 #ifndef __linux__ 447 int aio_wait(int aio_id); 448 #endif /* !__linux__ */ 449 450 char *hms(time_t t); 451 int aio_done(struct aio_info *ainfo); 452 void doio_upanic(int mask); 453 int parse_cmdline(int argc, char **argv, char *opts); 454 455 #ifndef CRAY 456 void parse_memalloc(char *arg); 457 void dump_memalloc(void); 458 #endif /* !CRAY */ 459 460 void parse_delay(char *arg); 461 int usage(FILE * stream); 462 void help(FILE * stream); 463 464 /* 465 * Upanic conditions, and a map from symbolics to values 466 */ 467 468 #define U_CORRUPTION 0001 /* upanic on data corruption */ 469 #define U_IOSW 0002 /* upanic on bad iosw */ 470 #define U_RVAL 0004 /* upanic on bad rval */ 471 472 #define U_ALL (U_CORRUPTION | U_IOSW | U_RVAL) 473 474 struct smap Upanic_Args[] = { 475 {"corruption", U_CORRUPTION}, 476 {"iosw", U_IOSW}, 477 {"rval", U_RVAL}, 478 {"all", U_ALL}, 479 {NULL, 0} 480 }; 481 482 struct aio_info Aio_Info[MAX_AIO]; 483 484 /* -C data-fill/check type */ 485 #define C_DEFAULT 1 486 struct smap checkmap[] = { 487 {"default", C_DEFAULT}, 488 {NULL, 0}, 489 }; 490 491 /* -d option delay types */ 492 #define DELAY_SELECT 1 493 #define DELAY_SLEEP 2 494 #define DELAY_SGINAP 3 495 #define DELAY_ALARM 4 496 #define DELAY_ITIMER 5 /* POSIX timer */ 497 498 struct smap delaymap[] = { 499 {"select", DELAY_SELECT}, 500 {"sleep", DELAY_SLEEP}, 501 #ifdef sgi 502 {"sginap", DELAY_SGINAP}, 503 #endif 504 {"alarm", DELAY_ALARM}, 505 {NULL, 0}, 506 }; 507 508 /****** 509 * 510 * strerror() does similar actions. 511 512 char * 513 syserrno(int err) 514 { 515 static char sys_errno[10]; 516 sprintf(sys_errno, "%d", errno); 517 return(sys_errno); 518 } 519 520 ******/ 521 522 int main(int argc, char **argv) 523 { 524 int i, pid, stat, ex_stat; 525 #ifdef CRAY 526 sigset_t omask; 527 #elif defined(linux) 528 sigset_t omask, block_mask; 529 #else 530 int omask; 531 #endif 532 struct sigaction sa; 533 534 umask(0); /* force new file modes to known values */ 535 #if _CRAYMPP 536 Npes = sysconf(_SC_CRAY_NPES); /* must do this before parse_cmdline */ 537 Vpe = sysconf(_SC_CRAY_VPE); 538 #endif 539 540 TagName[0] = '\0'; 541 parse_cmdline(argc, argv, OPTS); 542 543 random_range_seed(getpid()); /* initialize random number generator */ 544 545 /* 546 * If this is a re-exec of doio, jump directly into the doio function. 547 */ 548 549 if (Execd) { 550 doio(); 551 exit(E_SETUP); 552 } 553 554 /* 555 * Stop on all but a few signals... 556 */ 557 sigemptyset(&sa.sa_mask); 558 sa.sa_handler = sigint_handler; 559 sa.sa_flags = SA_RESETHAND; /* sigint is ignored after the */ 560 /* first time */ 561 for (i = 1; i <= NSIG; i++) { 562 switch (i) { 563 #ifdef SIGRECOVERY 564 case SIGRECOVERY: 565 break; 566 #endif 567 #ifdef SIGCKPT 568 case SIGCKPT: 569 #endif 570 #ifdef SIGRESTART 571 case SIGRESTART: 572 #endif 573 case SIGTSTP: 574 case SIGSTOP: 575 case SIGCONT: 576 case SIGCHLD: 577 case SIGBUS: 578 case SIGSEGV: 579 case SIGQUIT: 580 break; 581 default: 582 sigaction(i, &sa, NULL); 583 } 584 } 585 586 /* 587 * If we're logging write operations, make a dummy call to wlog_open 588 * to initialize the write history file. This call must be done in 589 * the parent, to ensure that the history file exists and/or has 590 * been truncated before any children attempt to open it, as the doio 591 * children are not allowed to truncate the file. 592 */ 593 594 if (w_opt) { 595 strcpy(Wlog.w_file, Write_Log); 596 597 if (wlog_open(&Wlog, 1, 0666) < 0) { 598 doio_fprintf(stderr, 599 "Could not create/truncate write log %s\n", 600 Write_Log); 601 exit(2); 602 } 603 604 wlog_close(&Wlog); 605 } 606 607 /* 608 * Malloc space for the children pid array. Initialize all entries 609 * to -1. 610 */ 611 612 Children = malloc(sizeof(int) * Nprocs); 613 for (i = 0; i < Nprocs; i++) { 614 Children[i] = -1; 615 } 616 617 sigemptyset(&block_mask); 618 sigaddset(&block_mask, SIGCHLD); 619 sigprocmask(SIG_BLOCK, &block_mask, &omask); 620 621 /* 622 * Fork Nprocs. This [parent] process is a watchdog, to notify the 623 * invoker of procs which exit abnormally, and to make sure that all 624 * child procs get cleaned up. If the -e option was used, we will also 625 * re-exec. This is mostly for unicos/mk on mpp's, to ensure that not 626 * all of the doio's don't end up in the same pe. 627 * 628 * Note - if Nprocs is 1, or this doio is a multi-pe app (Npes > 1), 629 * jump directly to doio(). multi-pe apps can't fork(), and there is 630 * no reason to fork() for 1 proc. 631 */ 632 633 if (Nprocs == 1 || Npes > 1) { 634 doio(); 635 exit(0); 636 } else { 637 for (i = 0; i < Nprocs; i++) { 638 if ((pid = fork()) == -1) { 639 doio_fprintf(stderr, 640 "(parent) Could not fork %d children: %s (%d)\n", 641 i + 1, SYSERR, errno); 642 exit(E_SETUP); 643 } 644 645 Children[Nchildren] = pid; 646 Nchildren++; 647 648 if (pid == 0) { 649 if (e_opt) { 650 char *exec_path; 651 652 exec_path = argv[0]; 653 argv[0] = malloc(strlen(exec_path) + 2); 654 sprintf(argv[0], "-%s", exec_path); 655 656 execvp(exec_path, argv); 657 doio_fprintf(stderr, 658 "(parent) Could not execvp %s: %s (%d)\n", 659 exec_path, SYSERR, errno); 660 exit(E_SETUP); 661 } else { 662 doio(); 663 exit(E_SETUP); 664 } 665 } 666 } 667 668 /* 669 * Parent spins on wait(), until all children exit. 670 */ 671 672 ex_stat = E_NORMAL; 673 674 while (Nprocs) { 675 if ((pid = wait(&stat)) == -1) { 676 if (errno == EINTR) 677 continue; 678 } 679 680 for (i = 0; i < Nchildren; i++) 681 if (Children[i] == pid) 682 Children[i] = -1; 683 684 Nprocs--; 685 686 if (WIFEXITED(stat)) { 687 switch (WEXITSTATUS(stat)) { 688 case E_NORMAL: 689 /* noop */ 690 break; 691 692 case E_INTERNAL: 693 doio_fprintf(stderr, 694 "(parent) pid %d exited because of an internal error\n", 695 pid); 696 ex_stat |= E_INTERNAL; 697 break; 698 699 case E_SETUP: 700 doio_fprintf(stderr, 701 "(parent) pid %d exited because of a setup error\n", 702 pid); 703 ex_stat |= E_SETUP; 704 break; 705 706 case E_COMPARE: 707 doio_fprintf(stderr, 708 "(parent) pid %d exited because of data compare errors\n", 709 pid); 710 711 ex_stat |= E_COMPARE; 712 713 if (a_opt) 714 kill(0, SIGINT); 715 716 break; 717 718 case E_USAGE: 719 doio_fprintf(stderr, 720 "(parent) pid %d exited because of a usage error\n", 721 pid); 722 723 ex_stat |= E_USAGE; 724 break; 725 726 default: 727 doio_fprintf(stderr, 728 "(parent) pid %d exited with unknown status %d\n", 729 pid, WEXITSTATUS(stat)); 730 ex_stat |= E_INTERNAL; 731 break; 732 } 733 } else if (WIFSIGNALED(stat) 734 && WTERMSIG(stat) != SIGINT) { 735 doio_fprintf(stderr, 736 "(parent) pid %d terminated by signal %d\n", 737 pid, WTERMSIG(stat)); 738 739 ex_stat |= E_SIGNAL; 740 } 741 742 fflush(NULL); 743 } 744 } 745 746 exit(ex_stat); 747 748 } /* main */ 749 750 /* 751 * main doio function. Each doio child starts here, and never returns. 752 */ 753 754 void doio(void) 755 { 756 int rval, i, infd, nbytes; 757 char *cp; 758 struct io_req ioreq; 759 struct sigaction sa, def_action, ignore_action, exit_action; 760 #ifndef CRAY 761 struct sigaction sigbus_action; 762 #endif 763 764 Memsize = Sdssize = 0; 765 766 /* 767 * Initialize the Pattern - write-type syscalls will replace Pattern[1] 768 * with the pattern passed in the request. Make sure that 769 * strlen(Pattern) is not mod 16 so that out of order words will be 770 * detected. 771 */ 772 773 gethostname(Host, sizeof(Host)); 774 if ((cp = strchr(Host, '.')) != NULL) 775 *cp = '\0'; 776 777 Pattern_Length = sprintf(Pattern, "-:%d:%s:%s*", getpid(), Host, Prog); 778 779 if (!(Pattern_Length % 16)) { 780 Pattern_Length = sprintf(Pattern, "-:%d:%s:%s**", 781 getpid(), Host, Prog); 782 } 783 784 /* 785 * Open a couple of descriptors for the write-log file. One descriptor 786 * is for appending, one for random access. Write logging is done for 787 * file corruption detection. The program doio_check is capable of 788 * doing corruption detection based on a doio write-log. 789 */ 790 791 if (w_opt) { 792 793 strcpy(Wlog.w_file, Write_Log); 794 795 if (wlog_open(&Wlog, 0, 0666) == -1) { 796 doio_fprintf(stderr, 797 "Could not open write log file (%s): wlog_open() failed\n", 798 Write_Log); 799 exit(E_SETUP); 800 } 801 } 802 803 /* 804 * Open the input stream - either a file or stdin 805 */ 806 807 if (Infile == NULL) { 808 infd = 0; 809 } else { 810 if ((infd = open(Infile, O_RDWR)) == -1) { 811 doio_fprintf(stderr, 812 "Could not open input file (%s): %s (%d)\n", 813 Infile, SYSERR, errno); 814 exit(E_SETUP); 815 } 816 } 817 818 /* 819 * Define a set of signals that should never be masked. Receipt of 820 * these signals generally indicates a programming error, and we want 821 * a corefile at the point of error. We put SIGQUIT in this list so 822 * that ^\ will force a user core dump. 823 * 824 * Note: the handler for these should be SIG_DFL, all of them 825 * produce a corefile as the default action. 826 */ 827 828 ignore_action.sa_handler = SIG_IGN; 829 ignore_action.sa_flags = 0; 830 sigemptyset(&ignore_action.sa_mask); 831 832 def_action.sa_handler = SIG_DFL; 833 def_action.sa_flags = 0; 834 sigemptyset(&def_action.sa_mask); 835 836 #ifdef sgi 837 exit_action.sa_sigaction = cleanup_handler; 838 exit_action.sa_flags = SA_SIGINFO; 839 sigemptyset(&exit_action.sa_mask); 840 841 sa.sa_sigaction = die_handler; 842 sa.sa_flags = SA_SIGINFO; 843 sigemptyset(&sa.sa_mask); 844 845 sigbus_action.sa_sigaction = sigbus_handler; 846 sigbus_action.sa_flags = SA_SIGINFO; 847 sigemptyset(&sigbus_action.sa_mask); 848 #else 849 exit_action.sa_handler = cleanup_handler; 850 exit_action.sa_flags = 0; 851 sigemptyset(&exit_action.sa_mask); 852 853 sa.sa_handler = die_handler; 854 sa.sa_flags = 0; 855 sigemptyset(&sa.sa_mask); 856 857 #ifndef CRAY 858 sigbus_action.sa_handler = sigbus_handler; 859 sigbus_action.sa_flags = 0; 860 sigemptyset(&sigbus_action.sa_mask); 861 #endif 862 #endif 863 864 for (i = 1; i <= NSIG; i++) { 865 switch (i) { 866 /* Signals to terminate program on */ 867 case SIGINT: 868 sigaction(i, &exit_action, NULL); 869 break; 870 871 #ifndef CRAY 872 /* This depends on active_mmap_rw */ 873 case SIGBUS: 874 sigaction(i, &sigbus_action, NULL); 875 break; 876 #endif 877 878 /* Signals to Ignore... */ 879 case SIGSTOP: 880 case SIGCONT: 881 #ifdef SIGRECOVERY 882 case SIGRECOVERY: 883 #endif 884 sigaction(i, &ignore_action, NULL); 885 break; 886 887 /* Signals to trap & report & die */ 888 /*case SIGTRAP: */ 889 /*case SIGABRT: */ 890 #ifdef SIGERR /* cray only signals */ 891 case SIGERR: 892 case SIGBUFIO: 893 case SIGINFO: 894 #endif 895 /*case SIGFPE: */ 896 case SIGURG: 897 case SIGHUP: 898 case SIGTERM: 899 case SIGPIPE: 900 case SIGIO: 901 case SIGUSR1: 902 case SIGUSR2: 903 sigaction(i, &sa, NULL); 904 break; 905 906 /* Default Action for all other signals */ 907 default: 908 sigaction(i, &def_action, NULL); 909 break; 910 } 911 } 912 913 /* 914 * Main loop - each doio proc does this until the read returns eof (0). 915 * Call the appropriate io function based on the request type. 916 */ 917 918 while ((nbytes = read(infd, (char *)&ioreq, sizeof(ioreq)))) { 919 920 /* 921 * Periodically check our ppid. If it is 1, the child exits to 922 * help clean up in the case that the main doio process was 923 * killed. 924 */ 925 926 if (Reqno && ((Reqno % PPID_CHECK_INTERVAL) == 0)) { 927 if (getppid() == 1) { 928 doio_fprintf(stderr, 929 "Parent doio process has exited\n"); 930 alloc_mem(-1); 931 exit(E_SETUP); 932 } 933 } 934 935 if (nbytes == -1) { 936 doio_fprintf(stderr, 937 "read of %d bytes from input failed: %s (%d)\n", 938 sizeof(ioreq), SYSERR, errno); 939 alloc_mem(-1); 940 exit(E_SETUP); 941 } 942 943 if (nbytes != sizeof(ioreq)) { 944 doio_fprintf(stderr, 945 "read wrong # bytes from input stream, expected %d, got %d\n", 946 sizeof(ioreq), nbytes); 947 alloc_mem(-1); 948 exit(E_SETUP); 949 } 950 951 if (ioreq.r_magic != DOIO_MAGIC) { 952 doio_fprintf(stderr, 953 "got a bad magic # from input stream. Expected 0%o, got 0%o\n", 954 DOIO_MAGIC, ioreq.r_magic); 955 alloc_mem(-1); 956 exit(E_SETUP); 957 } 958 959 /* 960 * If we're on a Release_Interval multiple, relase all ssd and 961 * core space, and close all fd's in Fd_Map[]. 962 */ 963 964 if (Reqno && Release_Interval && !(Reqno % Release_Interval)) { 965 if (Memsize) { 966 #ifdef NOTDEF 967 sbrk(-1 * Memsize); 968 #else 969 alloc_mem(-1); 970 #endif 971 } 972 #ifdef _CRAY1 973 if (Sdssize) { 974 ssbreak(-1 * btoc(Sdssize)); 975 Sdsptr = 0; 976 Sdssize = 0; 977 } 978 #endif /* _CRAY1 */ 979 980 alloc_fd(NULL, 0); 981 } 982 983 switch (ioreq.r_type) { 984 case READ: 985 case READA: 986 rval = do_read(&ioreq); 987 break; 988 989 case WRITE: 990 case WRITEA: 991 rval = do_write(&ioreq); 992 break; 993 994 case READV: 995 case AREAD: 996 case PREAD: 997 case LREAD: 998 case LREADA: 999 case LSREAD: 1000 case LSREADA: 1001 case WRITEV: 1002 case AWRITE: 1003 case PWRITE: 1004 case MMAPR: 1005 case MMAPW: 1006 case LWRITE: 1007 case LWRITEA: 1008 case LSWRITE: 1009 case LSWRITEA: 1010 case LEREAD: 1011 case LEREADA: 1012 case LEWRITE: 1013 case LEWRITEA: 1014 rval = do_rw(&ioreq); 1015 break; 1016 1017 #ifdef CRAY 1018 case SSREAD: 1019 case SSWRITE: 1020 rval = do_ssdio(&ioreq); 1021 break; 1022 1023 case LISTIO: 1024 rval = do_listio(&ioreq); 1025 break; 1026 #endif 1027 1028 #ifdef sgi 1029 case RESVSP: 1030 case UNRESVSP: 1031 #ifdef F_FSYNC 1032 case DFFSYNC: 1033 #endif 1034 rval = do_fcntl(&ioreq); 1035 break; 1036 #endif /* sgi */ 1037 1038 #ifndef CRAY 1039 case FSYNC2: 1040 case FDATASYNC: 1041 rval = do_sync(&ioreq); 1042 break; 1043 #endif 1044 default: 1045 doio_fprintf(stderr, 1046 "Don't know how to handle io request type %d\n", 1047 ioreq.r_type); 1048 alloc_mem(-1); 1049 exit(E_SETUP); 1050 } 1051 1052 if (rval == SKIP_REQ) { 1053 Reqskipcnt++; 1054 } else if (rval != 0) { 1055 alloc_mem(-1); 1056 doio_fprintf(stderr, 1057 "doio(): operation %d returned != 0\n", 1058 ioreq.r_type); 1059 exit(E_SETUP); 1060 } 1061 1062 if (Message_Interval && Reqno % Message_Interval == 0) { 1063 doio_fprintf(stderr, 1064 "Info: %d requests done (%d skipped) by this process\n", 1065 Reqno, Reqskipcnt); 1066 } 1067 1068 Reqno++; 1069 1070 if (delayop != 0) 1071 doio_delay(); 1072 } 1073 1074 /* 1075 * Child exits normally 1076 */ 1077 alloc_mem(-1); 1078 exit(E_NORMAL); 1079 1080 } /* doio */ 1081 1082 void doio_delay(void) 1083 { 1084 struct timeval tv_delay; 1085 struct sigaction sa_al, sa_old; 1086 sigset_t al_mask; 1087 1088 switch (delayop) { 1089 case DELAY_SELECT: 1090 tv_delay.tv_sec = delaytime / 1000000; 1091 tv_delay.tv_usec = delaytime % 1000000; 1092 /*doio_fprintf(stdout, "delay_select: %d %d\n", 1093 tv_delay.tv_sec, tv_delay.tv_usec); */ 1094 select(0, NULL, NULL, NULL, &tv_delay); 1095 break; 1096 1097 case DELAY_SLEEP: 1098 sleep(delaytime); 1099 break; 1100 1101 #ifdef sgi 1102 case DELAY_SGINAP: 1103 sginap(delaytime); 1104 break; 1105 #endif 1106 1107 case DELAY_ALARM: 1108 sa_al.sa_flags = 0; 1109 sa_al.sa_handler = noop_handler; 1110 sigemptyset(&sa_al.sa_mask); 1111 sigaction(SIGALRM, &sa_al, &sa_old); 1112 sigemptyset(&al_mask); 1113 alarm(delaytime); 1114 sigsuspend(&al_mask); 1115 sigaction(SIGALRM, &sa_old, 0); 1116 break; 1117 } 1118 } 1119 1120 /* 1121 * Format IO requests, returning a pointer to the formatted text. 1122 * 1123 * format_strat - formats the async i/o completion strategy 1124 * format_rw - formats a read[a]/write[a] request 1125 * format_sds - formats a ssread/sswrite request 1126 * format_listio- formats a listio request 1127 * 1128 * ioreq is the doio io request structure. 1129 */ 1130 1131 struct smap sysnames[] = { 1132 {"READ", READ}, 1133 {"WRITE", WRITE}, 1134 {"READA", READA}, 1135 {"WRITEA", WRITEA}, 1136 {"SSREAD", SSREAD}, 1137 {"SSWRITE", SSWRITE}, 1138 {"LISTIO", LISTIO}, 1139 {"LREAD", LREAD}, 1140 {"LREADA", LREADA}, 1141 {"LWRITE", LWRITE}, 1142 {"LWRITEA", LWRITEA}, 1143 {"LSREAD", LSREAD}, 1144 {"LSREADA", LSREADA}, 1145 {"LSWRITE", LSWRITE}, 1146 {"LSWRITEA", LSWRITEA}, 1147 1148 /* Irix System Calls */ 1149 {"PREAD", PREAD}, 1150 {"PWRITE", PWRITE}, 1151 {"AREAD", AREAD}, 1152 {"AWRITE", AWRITE}, 1153 {"LLREAD", LLREAD}, 1154 {"LLAREAD", LLAREAD}, 1155 {"LLWRITE", LLWRITE}, 1156 {"LLAWRITE", LLAWRITE}, 1157 {"RESVSP", RESVSP}, 1158 {"UNRESVSP", UNRESVSP}, 1159 {"DFFSYNC", DFFSYNC}, 1160 1161 /* Irix and Linux System Calls */ 1162 {"READV", READV}, 1163 {"WRITEV", WRITEV}, 1164 {"MMAPR", MMAPR}, 1165 {"MMAPW", MMAPW}, 1166 {"FSYNC2", FSYNC2}, 1167 {"FDATASYNC", FDATASYNC}, 1168 1169 {"unknown", -1}, 1170 }; 1171 1172 struct smap aionames[] = { 1173 {"poll", A_POLL}, 1174 {"signal", A_SIGNAL}, 1175 {"recall", A_RECALL}, 1176 {"recalla", A_RECALLA}, 1177 {"recalls", A_RECALLS}, 1178 {"suspend", A_SUSPEND}, 1179 {"callback", A_CALLBACK}, 1180 {"synch", 0}, 1181 {"unknown", -1}, 1182 }; 1183 1184 char *format_oflags(int oflags) 1185 { 1186 char flags[255]; 1187 1188 flags[0] = '\0'; 1189 switch (oflags & 03) { 1190 case O_RDONLY: 1191 strcat(flags, "O_RDONLY,"); 1192 break; 1193 case O_WRONLY: 1194 strcat(flags, "O_WRONLY,"); 1195 break; 1196 case O_RDWR: 1197 strcat(flags, "O_RDWR,"); 1198 break; 1199 default: 1200 strcat(flags, "O_weird"); 1201 break; 1202 } 1203 1204 if (oflags & O_EXCL) 1205 strcat(flags, "O_EXCL,"); 1206 1207 if (oflags & O_SYNC) 1208 strcat(flags, "O_SYNC,"); 1209 #ifdef CRAY 1210 if (oflags & O_RAW) 1211 strcat(flags, "O_RAW,"); 1212 if (oflags & O_WELLFORMED) 1213 strcat(flags, "O_WELLFORMED,"); 1214 #ifdef O_SSD 1215 if (oflags & O_SSD) 1216 strcat(flags, "O_SSD,"); 1217 #endif 1218 if (oflags & O_LDRAW) 1219 strcat(flags, "O_LDRAW,"); 1220 if (oflags & O_PARALLEL) 1221 strcat(flags, "O_PARALLEL,"); 1222 if (oflags & O_BIG) 1223 strcat(flags, "O_BIG,"); 1224 if (oflags & O_PLACE) 1225 strcat(flags, "O_PLACE,"); 1226 if (oflags & O_ASYNC) 1227 strcat(flags, "O_ASYNC,"); 1228 #endif 1229 1230 #ifdef sgi 1231 if (oflags & O_DIRECT) 1232 strcat(flags, "O_DIRECT,"); 1233 if (oflags & O_DSYNC) 1234 strcat(flags, "O_DSYNC,"); 1235 if (oflags & O_RSYNC) 1236 strcat(flags, "O_RSYNC,"); 1237 #endif 1238 1239 return (strdup(flags)); 1240 } 1241 1242 char *format_strat(int strategy) 1243 { 1244 char msg[64]; 1245 char *aio_strat; 1246 1247 switch (strategy) { 1248 case A_POLL: 1249 aio_strat = "POLL"; 1250 break; 1251 case A_SIGNAL: 1252 aio_strat = "SIGNAL"; 1253 break; 1254 case A_RECALL: 1255 aio_strat = "RECALL"; 1256 break; 1257 case A_RECALLA: 1258 aio_strat = "RECALLA"; 1259 break; 1260 case A_RECALLS: 1261 aio_strat = "RECALLS"; 1262 break; 1263 case A_SUSPEND: 1264 aio_strat = "SUSPEND"; 1265 break; 1266 case A_CALLBACK: 1267 aio_strat = "CALLBACK"; 1268 break; 1269 case 0: 1270 aio_strat = "<zero>"; 1271 break; 1272 default: 1273 sprintf(msg, "<error:%#o>", strategy); 1274 aio_strat = strdup(msg); 1275 break; 1276 } 1277 1278 return (aio_strat); 1279 } 1280 1281 char *format_rw(struct io_req *ioreq, int fd, void *buffer, int signo, 1282 char *pattern, void *iosw) 1283 { 1284 static char *errbuf = NULL; 1285 char *aio_strat, *cp; 1286 struct read_req *readp = &ioreq->r_data.read; 1287 struct write_req *writep = &ioreq->r_data.write; 1288 struct read_req *readap = &ioreq->r_data.read; 1289 struct write_req *writeap = &ioreq->r_data.write; 1290 1291 if (errbuf == NULL) 1292 errbuf = malloc(32768); 1293 1294 cp = errbuf; 1295 cp += sprintf(cp, "Request number %d\n", Reqno); 1296 1297 switch (ioreq->r_type) { 1298 case READ: 1299 cp += sprintf(cp, "syscall: read(%d, %#lo, %d)\n", 1300 fd, (unsigned long)buffer, readp->r_nbytes); 1301 cp += 1302 sprintf(cp, 1303 " fd %d is file %s - open flags are %#o\n", 1304 fd, readp->r_file, readp->r_oflags); 1305 cp += 1306 sprintf(cp, " read done at file offset %d\n", 1307 readp->r_offset); 1308 break; 1309 1310 case WRITE: 1311 cp += sprintf(cp, "syscall: write(%d, %#lo, %d)\n", 1312 fd, (unsigned long)buffer, writep->r_nbytes); 1313 cp += 1314 sprintf(cp, 1315 " fd %d is file %s - open flags are %#o\n", 1316 fd, writep->r_file, writep->r_oflags); 1317 cp += 1318 sprintf(cp, 1319 " write done at file offset %d - pattern is %s\n", 1320 writep->r_offset, pattern); 1321 break; 1322 1323 case READA: 1324 aio_strat = format_strat(readap->r_aio_strat); 1325 1326 cp += sprintf(cp, "syscall: reada(%d, %#lo, %d, %#lo, %d)\n", 1327 fd, (unsigned long)buffer, readap->r_nbytes, 1328 (unsigned long)iosw, signo); 1329 cp += 1330 sprintf(cp, 1331 " fd %d is file %s - open flags are %#o\n", 1332 fd, readap->r_file, readp->r_oflags); 1333 cp += 1334 sprintf(cp, " reada done at file offset %d\n", 1335 readap->r_offset); 1336 cp += 1337 sprintf(cp, 1338 " async io completion strategy is %s\n", 1339 aio_strat); 1340 break; 1341 1342 case WRITEA: 1343 aio_strat = format_strat(writeap->r_aio_strat); 1344 1345 cp += sprintf(cp, "syscall: writea(%d, %#lo, %d, %#lo, %d)\n", 1346 fd, (unsigned long)buffer, writeap->r_nbytes, 1347 (unsigned long)iosw, signo); 1348 cp += 1349 sprintf(cp, 1350 " fd %d is file %s - open flags are %#o\n", 1351 fd, writeap->r_file, writeap->r_oflags); 1352 cp += 1353 sprintf(cp, 1354 " writea done at file offset %d - pattern is %s\n", 1355 writeap->r_offset, pattern); 1356 cp += 1357 sprintf(cp, 1358 " async io completion strategy is %s\n", 1359 aio_strat); 1360 break; 1361 1362 } 1363 1364 return errbuf; 1365 } 1366 1367 #ifdef CRAY 1368 char *format_sds(struct io_req *ioreq, void *buffer, int sds, char *pattern) 1369 { 1370 int i; 1371 static char *errbuf = NULL; 1372 char *cp; 1373 1374 struct ssread_req *ssreadp = &ioreq->r_data.ssread; 1375 struct sswrite_req *sswritep = &ioreq->r_data.sswrite; 1376 1377 if (errbuf == NULL) 1378 errbuf = malloc(32768); 1379 1380 cp = errbuf; 1381 cp += sprintf(cp, "Request number %d\n", Reqno); 1382 1383 switch (ioreq->r_type) { 1384 case SSREAD: 1385 cp += sprintf(cp, "syscall: ssread(%#o, %#o, %d)\n", 1386 buffer, sds, ssreadp->r_nbytes); 1387 break; 1388 1389 case SSWRITE: 1390 cp += 1391 sprintf(cp, 1392 "syscall: sswrite(%#o, %#o, %d) - pattern was %s\n", 1393 buffer, sds, sswritep->r_nbytes, pattern); 1394 break; 1395 } 1396 return errbuf; 1397 } 1398 #endif /* CRAY */ 1399 1400 /* 1401 * Perform the various sorts of disk reads 1402 */ 1403 1404 int do_read(struct io_req *req) 1405 { 1406 int fd, offset, nbytes, oflags, rval; 1407 char *addr, *file; 1408 #ifdef CRAY 1409 struct aio_info *aiop; 1410 int aio_id, aio_strat, signo; 1411 #endif 1412 #ifdef sgi 1413 struct fd_cache *fdc; 1414 #endif 1415 1416 /* 1417 * Initialize common fields - assumes r_oflags, r_file, r_offset, and 1418 * r_nbytes are at the same offset in the read_req and reada_req 1419 * structures. 1420 */ 1421 1422 file = req->r_data.read.r_file; 1423 oflags = req->r_data.read.r_oflags; 1424 offset = req->r_data.read.r_offset; 1425 nbytes = req->r_data.read.r_nbytes; 1426 1427 /*printf("read: %s, %#o, %d %d\n", file, oflags, offset, nbytes); */ 1428 1429 /* 1430 * Grab an open file descriptor 1431 * Note: must be done before memory allocation so that the direct i/o 1432 * information is available in mem. allocate 1433 */ 1434 1435 if ((fd = alloc_fd(file, oflags)) == -1) 1436 return -1; 1437 1438 /* 1439 * Allocate core or sds - based on the O_SSD flag 1440 */ 1441 1442 #ifndef wtob 1443 #define wtob(x) (x * sizeof(UINT64_T)) 1444 #endif 1445 1446 #ifdef CRAY 1447 if (oflags & O_SSD) { 1448 if (alloc_sds(nbytes) == -1) 1449 return -1; 1450 1451 addr = (char *)Sdsptr; 1452 } else { 1453 if ((rval = 1454 alloc_mem(nbytes + wtob(1) * 2 + 1455 MPP_BUMP * sizeof(UINT64_T))) < 0) { 1456 return rval; 1457 } 1458 1459 addr = Memptr; 1460 1461 /* 1462 * if io is not raw, bump the offset by a random amount 1463 * to generate non-word-aligned io. 1464 */ 1465 if (!(req->r_data.read.r_uflags & F_WORD_ALIGNED)) { 1466 addr += random_range(0, wtob(1) - 1, 1, NULL); 1467 } 1468 } 1469 #else 1470 #ifdef sgi 1471 /* get memory alignment for using DIRECT I/O */ 1472 fdc = alloc_fdcache(file, oflags); 1473 1474 if ((rval = alloc_mem(nbytes + wtob(1) * 2 + fdc->c_memalign)) < 0) { 1475 return rval; 1476 } 1477 1478 addr = Memptr; 1479 1480 if ((req->r_data.read.r_uflags & F_WORD_ALIGNED)) { 1481 /* 1482 * Force memory alignment for Direct I/O 1483 */ 1484 if ((oflags & O_DIRECT) && ((long)addr % fdc->c_memalign != 0)) { 1485 addr += 1486 fdc->c_memalign - ((long)addr % fdc->c_memalign); 1487 } 1488 } else { 1489 addr += random_range(0, wtob(1) - 1, 1, NULL); 1490 } 1491 #else 1492 /* what is !CRAY && !sgi ? */ 1493 if ((rval = alloc_mem(nbytes + wtob(1) * 2)) < 0) { 1494 return rval; 1495 } 1496 1497 addr = Memptr; 1498 #endif /* !CRAY && sgi */ 1499 #endif /* CRAY */ 1500 1501 switch (req->r_type) { 1502 case READ: 1503 /* move to the desired file position. */ 1504 if (lseek(fd, offset, SEEK_SET) == -1) { 1505 doio_fprintf(stderr, 1506 "lseek(%d, %d, SEEK_SET) failed: %s (%d)\n", 1507 fd, offset, SYSERR, errno); 1508 return -1; 1509 } 1510 1511 if ((rval = read(fd, addr, nbytes)) == -1) { 1512 doio_fprintf(stderr, 1513 "read() request failed: %s (%d)\n%s\n", 1514 SYSERR, errno, 1515 format_rw(req, fd, addr, -1, NULL, NULL)); 1516 doio_upanic(U_RVAL); 1517 return -1; 1518 } else if (rval != nbytes) { 1519 doio_fprintf(stderr, 1520 "read() request returned wrong # of bytes - expected %d, got %d\n%s\n", 1521 nbytes, rval, 1522 format_rw(req, fd, addr, -1, NULL, NULL)); 1523 doio_upanic(U_RVAL); 1524 return -1; 1525 } 1526 break; 1527 1528 #ifdef CRAY 1529 case READA: 1530 /* 1531 * Async read 1532 */ 1533 1534 /* move to the desired file position. */ 1535 if (lseek(fd, offset, SEEK_SET) == -1) { 1536 doio_fprintf(stderr, 1537 "lseek(%d, %d, SEEK_SET) failed: %s (%d)\n", 1538 fd, offset, SYSERR, errno); 1539 return -1; 1540 } 1541 1542 aio_strat = req->r_data.read.r_aio_strat; 1543 signo = (aio_strat == A_SIGNAL) ? SIGUSR1 : 0; 1544 1545 aio_id = aio_register(fd, aio_strat, signo); 1546 aiop = aio_slot(aio_id); 1547 1548 if (reada(fd, addr, nbytes, &aiop->iosw, signo) == -1) { 1549 doio_fprintf(stderr, "reada() failed: %s (%d)\n%s\n", 1550 SYSERR, errno, 1551 format_rw(req, fd, addr, signo, NULL, 1552 &aiop->iosw)); 1553 aio_unregister(aio_id); 1554 doio_upanic(U_RVAL); 1555 rval = -1; 1556 } else { 1557 /* 1558 * Wait for io to complete 1559 */ 1560 1561 aio_wait(aio_id); 1562 1563 /* 1564 * make sure the io completed without error 1565 */ 1566 1567 if (aiop->iosw.sw_count != nbytes) { 1568 doio_fprintf(stderr, 1569 "Bad iosw from reada()\nExpected (%d,%d,%d), got (%d,%d,%d)\n%s\n", 1570 1, 0, nbytes, 1571 aiop->iosw.sw_flag, 1572 aiop->iosw.sw_error, 1573 aiop->iosw.sw_count, 1574 format_rw(req, fd, addr, signo, 1575 NULL, &aiop->iosw)); 1576 aio_unregister(aio_id); 1577 doio_upanic(U_IOSW); 1578 rval = -1; 1579 } else { 1580 aio_unregister(aio_id); 1581 rval = 0; 1582 } 1583 } 1584 1585 if (rval == -1) 1586 return rval; 1587 break; 1588 #endif /* CRAY */ 1589 } 1590 1591 return 0; /* if we get here, everything went ok */ 1592 } 1593 1594 /* 1595 * Perform the verious types of disk writes. 1596 */ 1597 1598 int do_write(struct io_req *req) 1599 { 1600 static int pid = -1; 1601 int fd, nbytes, oflags, signo; 1602 int logged_write, rval, got_lock; 1603 off_t offset, woffset; 1604 char *addr, pattern, *file, *msg; 1605 struct wlog_rec wrec; 1606 #ifdef CRAY 1607 int aio_strat, aio_id; 1608 struct aio_info *aiop; 1609 #endif 1610 #ifdef sgi 1611 struct fd_cache *fdc; 1612 #endif 1613 1614 woffset = 0; 1615 1616 /* 1617 * Misc variable setup 1618 */ 1619 1620 signo = 0; 1621 nbytes = req->r_data.write.r_nbytes; 1622 offset = req->r_data.write.r_offset; 1623 pattern = req->r_data.write.r_pattern; 1624 file = req->r_data.write.r_file; 1625 oflags = req->r_data.write.r_oflags; 1626 1627 /*printf("pwrite: %s, %#o, %d %d\n", file, oflags, offset, nbytes); */ 1628 1629 /* 1630 * Allocate core memory and possibly sds space. Initialize the data 1631 * to be written. 1632 */ 1633 1634 Pattern[0] = pattern; 1635 1636 /* 1637 * Get a descriptor to do the io on 1638 */ 1639 1640 if ((fd = alloc_fd(file, oflags)) == -1) 1641 return -1; 1642 1643 /*printf("write: %d, %s, %#o, %d %d\n", 1644 fd, file, oflags, offset, nbytes); */ 1645 1646 /* 1647 * Allocate SDS space for backdoor write if desired 1648 */ 1649 1650 #ifdef CRAY 1651 if (oflags & O_SSD) { 1652 #ifndef _CRAYMPP 1653 if ((rval = alloc_mem(nbytes + wtob(1))) < 0) { 1654 return rval; 1655 } 1656 1657 (*Data_Fill) (Memptr, nbytes, Pattern, Pattern_Length, 0); 1658 /*pattern_fill(Memptr, nbytes, Pattern, Pattern_Length, 0); */ 1659 1660 if (alloc_sds(nbytes) == -1) 1661 return -1; 1662 1663 if (sswrite((long)Memptr, Sdsptr, btoc(nbytes)) == -1) { 1664 doio_fprintf(stderr, 1665 "sswrite(%d, %d, %d) failed: %s (%d)\n", 1666 (long)Memptr, Sdsptr, btoc(nbytes), SYSERR, 1667 errno); 1668 fflush(stderr); 1669 return -1; 1670 } 1671 1672 addr = (char *)Sdsptr; 1673 #else 1674 doio_fprintf(stderr, 1675 "Invalid O_SSD flag was generated for MPP system\n"); 1676 fflush(stderr); 1677 return -1; 1678 #endif /* !CRAYMPP */ 1679 } else { 1680 if ((rval = alloc_mem(nbytes + wtob(1)) < 0)) { 1681 return rval; 1682 } 1683 1684 addr = Memptr; 1685 1686 /* 1687 * if io is not raw, bump the offset by a random amount 1688 * to generate non-word-aligned io. 1689 */ 1690 1691 if (!(req->r_data.write.r_uflags & F_WORD_ALIGNED)) { 1692 addr += random_range(0, wtob(1) - 1, 1, NULL); 1693 } 1694 1695 (*Data_Fill) (Memptr, nbytes, Pattern, Pattern_Length, 0); 1696 if (addr != Memptr) 1697 memmove(addr, Memptr, nbytes); 1698 } 1699 #else /* CRAY */ 1700 #ifdef sgi 1701 /* get memory alignment for using DIRECT I/O */ 1702 fdc = alloc_fdcache(file, oflags); 1703 1704 if ((rval = alloc_mem(nbytes + wtob(1) * 2 + fdc->c_memalign)) < 0) { 1705 return rval; 1706 } 1707 1708 addr = Memptr; 1709 1710 if ((req->r_data.write.r_uflags & F_WORD_ALIGNED)) { 1711 /* 1712 * Force memory alignment for Direct I/O 1713 */ 1714 if ((oflags & O_DIRECT) && ((long)addr % fdc->c_memalign != 0)) { 1715 addr += 1716 fdc->c_memalign - ((long)addr % fdc->c_memalign); 1717 } 1718 } else { 1719 addr += random_range(0, wtob(1) - 1, 1, NULL); 1720 } 1721 1722 (*Data_Fill) (Memptr, nbytes, Pattern, Pattern_Length, 0); 1723 if (addr != Memptr) 1724 memmove(addr, Memptr, nbytes); 1725 1726 #else /* sgi */ 1727 if ((rval = alloc_mem(nbytes + wtob(1) * 2)) < 0) { 1728 return rval; 1729 } 1730 1731 addr = Memptr; 1732 1733 (*Data_Fill) (Memptr, nbytes, Pattern, Pattern_Length, 0); 1734 if (addr != Memptr) 1735 memmove(addr, Memptr, nbytes); 1736 #endif /* sgi */ 1737 #endif /* CRAY */ 1738 1739 rval = -1; 1740 got_lock = 0; 1741 logged_write = 0; 1742 1743 if (k_opt) { 1744 if (lock_file_region(file, fd, F_WRLCK, offset, nbytes) < 0) { 1745 alloc_mem(-1); 1746 exit(E_INTERNAL); 1747 } 1748 1749 got_lock = 1; 1750 } 1751 1752 /* 1753 * Write a preliminary write-log entry. This is done so that 1754 * doio_check can do corruption detection across an interrupt/crash. 1755 * Note that w_done is set to 0. If doio_check sees this, it 1756 * re-creates the file extents as if the write completed, but does not 1757 * do any checking - see comments in doio_check for more details. 1758 */ 1759 1760 if (w_opt) { 1761 if (pid == -1) { 1762 pid = getpid(); 1763 } 1764 wrec.w_async = (req->r_type == WRITEA) ? 1 : 0; 1765 wrec.w_oflags = oflags; 1766 wrec.w_pid = pid; 1767 wrec.w_offset = offset; 1768 wrec.w_nbytes = nbytes; 1769 1770 wrec.w_pathlen = strlen(file); 1771 memcpy(wrec.w_path, file, wrec.w_pathlen); 1772 wrec.w_hostlen = strlen(Host); 1773 memcpy(wrec.w_host, Host, wrec.w_hostlen); 1774 wrec.w_patternlen = Pattern_Length; 1775 memcpy(wrec.w_pattern, Pattern, wrec.w_patternlen); 1776 1777 wrec.w_done = 0; 1778 1779 if ((woffset = wlog_record_write(&Wlog, &wrec, -1)) == -1) { 1780 doio_fprintf(stderr, 1781 "Could not append to write-log: %s (%d)\n", 1782 SYSERR, errno); 1783 } else { 1784 logged_write = 1; 1785 } 1786 } 1787 1788 switch (req->r_type) { 1789 case WRITE: 1790 /* 1791 * sync write 1792 */ 1793 1794 if (lseek(fd, offset, SEEK_SET) == -1) { 1795 doio_fprintf(stderr, 1796 "lseek(%d, %d, SEEK_SET) failed: %s (%d)\n", 1797 fd, offset, SYSERR, errno); 1798 return -1; 1799 } 1800 1801 rval = write(fd, addr, nbytes); 1802 1803 if (rval == -1) { 1804 doio_fprintf(stderr, 1805 "write() failed: %s (%d)\n%s\n", 1806 SYSERR, errno, 1807 format_rw(req, fd, addr, -1, Pattern, 1808 NULL)); 1809 #ifdef sgi 1810 doio_fprintf(stderr, 1811 "write() failed: %s\n\twrite(%d, %#o, %d)\n\toffset %d, nbytes%%miniou(%d)=%d, oflags=%#o memalign=%d, addr%%memalign=%d\n", 1812 strerror(errno), 1813 fd, addr, nbytes, 1814 offset, 1815 fdc->c_miniosz, nbytes % fdc->c_miniosz, 1816 oflags, fdc->c_memalign, 1817 (long)addr % fdc->c_memalign); 1818 #else 1819 doio_fprintf(stderr, 1820 "write() failed: %s\n\twrite(%d, %#o, %d)\n\toffset %d, nbytes%%1B=%d, oflags=%#o\n", 1821 strerror(errno), 1822 fd, addr, nbytes, 1823 offset, nbytes % 4096, oflags); 1824 #endif 1825 doio_upanic(U_RVAL); 1826 } else if (rval != nbytes) { 1827 doio_fprintf(stderr, 1828 "write() returned wrong # bytes - expected %d, got %d\n%s\n", 1829 nbytes, rval, 1830 format_rw(req, fd, addr, -1, Pattern, 1831 NULL)); 1832 doio_upanic(U_RVAL); 1833 rval = -1; 1834 } 1835 1836 break; 1837 1838 #ifdef CRAY 1839 case WRITEA: 1840 /* 1841 * async write 1842 */ 1843 if (lseek(fd, offset, SEEK_SET) == -1) { 1844 doio_fprintf(stderr, 1845 "lseek(%d, %d, SEEK_SET) failed: %s (%d)\n", 1846 fd, offset, SYSERR, errno); 1847 return -1; 1848 } 1849 1850 aio_strat = req->r_data.write.r_aio_strat; 1851 signo = (aio_strat == A_SIGNAL) ? SIGUSR1 : 0; 1852 1853 aio_id = aio_register(fd, aio_strat, signo); 1854 aiop = aio_slot(aio_id); 1855 1856 /* 1857 * init iosw and do the async write 1858 */ 1859 1860 if (writea(fd, addr, nbytes, &aiop->iosw, signo) == -1) { 1861 doio_fprintf(stderr, 1862 "writea() failed: %s (%d)\n%s\n", 1863 SYSERR, errno, 1864 format_rw(req, fd, addr, -1, Pattern, 1865 NULL)); 1866 doio_upanic(U_RVAL); 1867 aio_unregister(aio_id); 1868 rval = -1; 1869 } else { 1870 1871 /* 1872 * Wait for io to complete 1873 */ 1874 1875 aio_wait(aio_id); 1876 1877 /* 1878 * check that iosw is ok 1879 */ 1880 1881 if (aiop->iosw.sw_count != nbytes) { 1882 doio_fprintf(stderr, 1883 "Bad iosw from writea()\nExpected (%d,%d,%d), got (%d,%d,%d)\n%s\n", 1884 1, 0, nbytes, 1885 aiop->iosw.sw_flag, 1886 aiop->iosw.sw_error, 1887 aiop->iosw.sw_count, 1888 format_rw(req, fd, addr, -1, 1889 Pattern, &aiop->iosw)); 1890 aio_unregister(aio_id); 1891 doio_upanic(U_IOSW); 1892 rval = -1; 1893 } else { 1894 aio_unregister(aio_id); 1895 rval = 0; 1896 } 1897 } 1898 break; 1899 1900 #endif /* CRAY */ 1901 } 1902 1903 /* 1904 * Verify that the data was written correctly - check_file() returns 1905 * a non-null pointer which contains an error message if there are 1906 * problems. 1907 */ 1908 1909 if (v_opt) { 1910 msg = check_file(file, offset, nbytes, Pattern, Pattern_Length, 1911 0, oflags & O_PARALLEL); 1912 if (msg != NULL) { 1913 doio_fprintf(stderr, "%s%s\n", msg, 1914 #ifdef CRAY 1915 format_rw(req, fd, addr, -1, Pattern, 1916 &aiop->iosw) 1917 #else 1918 format_rw(req, fd, addr, -1, Pattern, NULL) 1919 #endif 1920 ); 1921 doio_upanic(U_CORRUPTION); 1922 exit(E_COMPARE); 1923 1924 } 1925 } 1926 1927 /* 1928 * General cleanup ... 1929 * 1930 * Write extent information to the write-log, so that doio_check can do 1931 * corruption detection. Note that w_done is set to 1, indicating that 1932 * the write has been verified as complete. We don't need to write the 1933 * filename on the second logging. 1934 */ 1935 1936 if (w_opt && logged_write) { 1937 wrec.w_done = 1; 1938 wlog_record_write(&Wlog, &wrec, woffset); 1939 } 1940 1941 /* 1942 * Unlock file region if necessary 1943 */ 1944 1945 if (got_lock) { 1946 if (lock_file_region(file, fd, F_UNLCK, offset, nbytes) < 0) { 1947 alloc_mem(-1); 1948 exit(E_INTERNAL); 1949 } 1950 } 1951 1952 return ((rval == -1) ? -1 : 0); 1953 } 1954 1955 /* 1956 * Simple routine to lock/unlock a file using fcntl() 1957 */ 1958 1959 int lock_file_region(char *fname, int fd, int type, int start, int nbytes) 1960 { 1961 struct flock flk; 1962 1963 flk.l_type = type; 1964 flk.l_whence = 0; 1965 flk.l_start = start; 1966 flk.l_len = nbytes; 1967 1968 if (fcntl(fd, F_SETLKW, &flk) < 0) { 1969 doio_fprintf(stderr, 1970 "fcntl(%d, %d, %#o) failed for file %s, lock type %d, offset %d, length %d: %s (%d), open flags: %#o\n", 1971 fd, F_SETLKW, &flk, fname, type, 1972 start, nbytes, SYSERR, errno, 1973 fcntl(fd, F_GETFL, 0)); 1974 return -1; 1975 } 1976 1977 return 0; 1978 } 1979 1980 /* 1981 * Perform a listio request. 1982 */ 1983 1984 #ifdef CRAY 1985 char *format_listio(struct io_req *ioreq, int lcmd, struct listreq *list, 1986 int nent, int fd, char *pattern) 1987 { 1988 static char *errbuf = NULL; 1989 struct listio_req *liop = &ioreq->r_data.listio; 1990 struct listreq *listreq; 1991 char *cp, *cmd, *opcode, *aio_strat; 1992 int i; 1993 1994 switch (lcmd) { 1995 case LC_START: 1996 cmd = "LC_START"; 1997 break; 1998 case LC_WAIT: 1999 cmd = "LC_WAIT"; 2000 break; 2001 default: 2002 cmd = "???"; 2003 break; 2004 } 2005 2006 if (errbuf == NULL) 2007 errbuf = malloc(32768); 2008 2009 cp = errbuf; 2010 cp += sprintf(cp, "Request number %d\n", Reqno); 2011 2012 cp += sprintf(cp, "syscall: listio(%s, %#o, %d)\n\n", cmd, list, nent); 2013 2014 aio_strat = format_strat(liop->r_aio_strat); 2015 2016 for (i = 0; i < nent; i++) { 2017 cp += sprintf(cp, "struct lioreq for request element %d\n", i); 2018 cp += sprintf(cp, "----------------------------------------\n"); 2019 2020 listreq = list + i; 2021 2022 switch (listreq->li_opcode) { 2023 case LO_READ: 2024 opcode = "LO_READ"; 2025 break; 2026 case LO_WRITE: 2027 opcode = "LO_WRITE"; 2028 break; 2029 default: 2030 opcode = "???"; 2031 break; 2032 } 2033 2034 cp += sprintf(cp, " li_opcode = %s\n", opcode); 2035 cp += 2036 sprintf(cp, " li_drvr = %#o\n", 2037 listreq->li_drvr); 2038 cp += 2039 sprintf(cp, " li_flags = %#o\n", 2040 listreq->li_flags); 2041 cp += 2042 sprintf(cp, " li_offset = %d\n", 2043 listreq->li_offset); 2044 cp += 2045 sprintf(cp, " li_fildes = %d\n", 2046 listreq->li_fildes); 2047 cp += 2048 sprintf(cp, " li_buf = %#o\n", 2049 listreq->li_buf); 2050 cp += 2051 sprintf(cp, " li_nbyte = %d\n", 2052 listreq->li_nbyte); 2053 cp += 2054 sprintf(cp, " li_status = %#o (%d, %d, %d)\n", 2055 listreq->li_status, listreq->li_status->sw_flag, 2056 listreq->li_status->sw_error, 2057 listreq->li_status->sw_count); 2058 cp += 2059 sprintf(cp, " li_signo = %d\n", 2060 listreq->li_signo); 2061 cp += 2062 sprintf(cp, " li_nstride = %d\n", 2063 listreq->li_nstride); 2064 cp += 2065 sprintf(cp, " li_filstride = %d\n", 2066 listreq->li_filstride); 2067 cp += 2068 sprintf(cp, " li_memstride = %d\n", 2069 listreq->li_memstride); 2070 cp += 2071 sprintf(cp, " io completion strategy is %s\n", 2072 aio_strat); 2073 } 2074 return errbuf; 2075 } 2076 #endif /* CRAY */ 2077 2078 int do_listio(struct io_req *req) 2079 { 2080 #ifdef CRAY 2081 struct listio_req *lio; 2082 int fd, oflags, signo, nb, i; 2083 int logged_write, rval, got_lock; 2084 int aio_strat, aio_id; 2085 int min_byte, max_byte; 2086 int mem_needed; 2087 int foffset, fstride, mstride, nstrides; 2088 char *moffset; 2089 long offset, woffset; 2090 char *addr, *msg; 2091 sigset_t block_mask, omask; 2092 struct wlog_rec wrec; 2093 struct aio_info *aiop; 2094 struct listreq lio_req; 2095 2096 lio = &req->r_data.listio; 2097 2098 /* 2099 * If bytes per stride is less than the stride size, drop the request 2100 * since it will cause overlapping strides, and we cannot predict 2101 * the order they will complete in. 2102 */ 2103 2104 if (lio->r_filestride && abs(lio->r_filestride) < lio->r_nbytes) { 2105 doio_fprintf(stderr, 2106 "do_listio(): Bogus listio request - abs(filestride) [%d] < nbytes [%d]\n", 2107 abs(lio->r_filestride), lio->r_nbytes); 2108 return -1; 2109 } 2110 2111 /* 2112 * Allocate core memory. Initialize the data to be written. Make 2113 * sure we get enough, based on the memstride. 2114 */ 2115 2116 mem_needed = 2117 stride_bounds(0, lio->r_memstride, lio->r_nstrides, 2118 lio->r_nbytes, NULL, NULL); 2119 2120 if ((rval = alloc_mem(mem_needed + wtob(1))) < 0) { 2121 return rval; 2122 } 2123 2124 /* 2125 * Set the memory address pointer. If the io is not raw, adjust 2126 * addr by a random amount, so that non-raw io is not necessarily 2127 * word aligned. 2128 */ 2129 2130 addr = Memptr; 2131 2132 if (!(lio->r_uflags & F_WORD_ALIGNED)) { 2133 addr += random_range(0, wtob(1) - 1, 1, NULL); 2134 } 2135 2136 if (lio->r_opcode == LO_WRITE) { 2137 Pattern[0] = lio->r_pattern; 2138 (*Data_Fill) (Memptr, mem_needed, Pattern, Pattern_Length, 0); 2139 if (addr != Memptr) 2140 memmove(addr, Memptr, mem_needed); 2141 } 2142 2143 /* 2144 * Get a descriptor to do the io on. No need to do an lseek, as this 2145 * is encoded in the listio request. 2146 */ 2147 2148 if ((fd = alloc_fd(lio->r_file, lio->r_oflags)) == -1) { 2149 return -1; 2150 } 2151 2152 rval = -1; 2153 got_lock = 0; 2154 logged_write = 0; 2155 2156 /* 2157 * If the opcode is LO_WRITE, lock all regions of the file that 2158 * are touched by this listio request. Currently, we use 2159 * stride_bounds() to figure out the min and max bytes affected, and 2160 * lock the entire region, regardless of the file stride. 2161 */ 2162 2163 if (lio->r_opcode == LO_WRITE && k_opt) { 2164 stride_bounds(lio->r_offset, 2165 lio->r_filestride, lio->r_nstrides, 2166 lio->r_nbytes, &min_byte, &max_byte); 2167 2168 if (lock_file_region(lio->r_file, fd, F_WRLCK, 2169 min_byte, (max_byte - min_byte + 1)) < 0) { 2170 doio_fprintf(stderr, 2171 "stride_bounds(%d, %d, %d, %d, ..., ...) set min_byte to %d, max_byte to %d\n", 2172 lio->r_offset, lio->r_filestride, 2173 lio->r_nstrides, lio->r_nbytes, min_byte, 2174 max_byte); 2175 return -1; 2176 } else { 2177 got_lock = 1; 2178 } 2179 } 2180 2181 /* 2182 * async write 2183 */ 2184 2185 aio_strat = lio->r_aio_strat; 2186 signo = (aio_strat == A_SIGNAL) ? SIGUSR1 : 0; 2187 2188 aio_id = aio_register(fd, aio_strat, signo); 2189 aiop = aio_slot(aio_id); 2190 2191 /* 2192 * Form the listio request, and make the call. 2193 */ 2194 2195 lio_req.li_opcode = lio->r_opcode; 2196 lio_req.li_drvr = 0; 2197 lio_req.li_flags = LF_LSEEK; 2198 lio_req.li_offset = lio->r_offset; 2199 lio_req.li_fildes = fd; 2200 2201 if (lio->r_memstride >= 0 || lio->r_nstrides <= 1) { 2202 lio_req.li_buf = addr; 2203 } else { 2204 lio_req.li_buf = addr + mem_needed - lio->r_nbytes; 2205 } 2206 2207 lio_req.li_nbyte = lio->r_nbytes; 2208 lio_req.li_status = &aiop->iosw; 2209 lio_req.li_signo = signo; 2210 lio_req.li_nstride = lio->r_nstrides; 2211 lio_req.li_filstride = lio->r_filestride; 2212 lio_req.li_memstride = lio->r_memstride; 2213 2214 /* 2215 * If signo != 0, block signo while we're in the system call, so that 2216 * we don't get interrupted syscall failures. 2217 */ 2218 2219 if (signo) { 2220 sigemptyset(&block_mask); 2221 sigaddset(&block_mask, signo); 2222 sigprocmask(SIG_BLOCK, &block_mask, &omask); 2223 } 2224 2225 if (listio(lio->r_cmd, &lio_req, 1) < 0) { 2226 doio_fprintf(stderr, 2227 "listio() failed: %s (%d)\n%s\n", 2228 SYSERR, errno, 2229 format_listio(req, lio->r_cmd, &lio_req, 1, fd, 2230 Pattern)); 2231 aio_unregister(aio_id); 2232 doio_upanic(U_RVAL); 2233 goto lio_done; 2234 } 2235 2236 if (signo) { 2237 sigprocmask(SIG_SETMASK, &omask, NULL); 2238 } 2239 2240 /* 2241 * Wait for io to complete 2242 */ 2243 2244 aio_wait(aio_id); 2245 2246 nstrides = lio->r_nstrides ? lio->r_nstrides : 1; 2247 if (aiop->iosw.sw_count != lio->r_nbytes * nstrides) { 2248 doio_fprintf(stderr, 2249 "Bad iosw from listio()\nExpected (%d,%d,%d), got (%d,%d,%d)\n%s\n", 2250 1, 0, lio->r_nbytes * lio->r_nstrides, 2251 aiop->iosw.sw_flag, 2252 aiop->iosw.sw_error, aiop->iosw.sw_count, 2253 format_listio(req, lio->r_cmd, &lio_req, 1, fd, 2254 Pattern)); 2255 aio_unregister(aio_id); 2256 doio_upanic(U_IOSW); 2257 goto lio_done; 2258 } 2259 2260 aio_unregister(aio_id); 2261 2262 /* 2263 * Verify that the data was written correctly - check_file() returns 2264 * a non-null pointer which contains an error message if there are 2265 * problems. 2266 * 2267 * For listio, we basically have to make 1 call to check_file for each 2268 * stride. 2269 */ 2270 2271 if (v_opt && lio_req.li_opcode == LO_WRITE) { 2272 fstride = lio->r_filestride ? lio->r_filestride : lio->r_nbytes; 2273 mstride = lio->r_memstride ? lio->r_memstride : lio->r_nbytes; 2274 foffset = lio->r_offset; 2275 2276 if (mstride > 0 || lio->r_nstrides <= 1) { 2277 moffset = addr; 2278 } else { 2279 moffset = addr + mem_needed - lio->r_nbytes; 2280 } 2281 2282 for (i = 0; i < lio_req.li_nstride; i++) { 2283 msg = check_file(lio->r_file, 2284 foffset, lio->r_nbytes, 2285 Pattern, Pattern_Length, 2286 moffset - addr, 2287 lio->r_oflags & O_PARALLEL); 2288 2289 if (msg != NULL) { 2290 doio_fprintf(stderr, "%s\n%s\n", 2291 msg, 2292 format_listio(req, lio->r_cmd, 2293 &lio_req, 1, fd, 2294 Pattern)); 2295 doio_upanic(U_CORRUPTION); 2296 exit(E_COMPARE); 2297 } 2298 2299 moffset += mstride; 2300 foffset += fstride; 2301 } 2302 2303 } 2304 2305 rval = 0; 2306 2307 lio_done: 2308 2309 /* 2310 * General cleanup ... 2311 * 2312 */ 2313 2314 /* 2315 * Release file locks if necessary 2316 */ 2317 2318 if (got_lock) { 2319 if (lock_file_region(lio->r_file, fd, F_UNLCK, 2320 min_byte, (max_byte - min_byte + 1)) < 0) { 2321 return -1; 2322 } 2323 } 2324 2325 return rval; 2326 #else 2327 return -1; 2328 #endif 2329 } 2330 2331 /* 2332 * perform ssread/sswrite operations 2333 */ 2334 2335 #ifdef _CRAY1 2336 2337 int do_ssdio(struct io_req *req) 2338 { 2339 int nbytes, nb; 2340 char errbuf[BSIZE]; 2341 2342 nbytes = req->r_data.ssread.r_nbytes; 2343 2344 /* 2345 * Grab core and sds space 2346 */ 2347 2348 if ((nb = alloc_mem(nbytes)) < 0) 2349 return nb; 2350 2351 if (alloc_sds(nbytes) == -1) 2352 return -1; 2353 2354 if (req->r_type == SSWRITE) { 2355 2356 /* 2357 * Init data and ship it to the ssd 2358 */ 2359 2360 Pattern[0] = req->r_data.sswrite.r_pattern; 2361 /*pattern_fill(Memptr, nbytes, Pattern, Pattern_Length, 0); */ 2362 (*Data_Fill) (Memptr, nbytes, Pattern, Pattern_Length, 0); 2363 2364 if (sswrite((long)Memptr, (long)Sdsptr, btoc(nbytes)) == -1) { 2365 doio_fprintf(stderr, "sswrite() failed: %s (%d)\n%s\n", 2366 SYSERR, errno, 2367 format_sds(req, Memptr, Sdsptr, Pattern)); 2368 doio_upanic(U_RVAL); 2369 return -1; 2370 } 2371 } else { 2372 /* 2373 * read from sds 2374 */ 2375 2376 if (ssread((long)Memptr, (long)Sdsptr, btoc(nbytes)) == -1) { 2377 doio_fprintf(stderr, "ssread() failed: %s (%d)\n%s\n", 2378 SYSERR, errno, 2379 format_sds(req, Memptr, Sdsptr, Pattern)); 2380 2381 doio_upanic(U_RVAL); 2382 return -1; 2383 } 2384 } 2385 2386 /* 2387 * Verify data if SSWRITE and v_opt 2388 */ 2389 2390 if (v_opt && req->r_type == SSWRITE) { 2391 ssread((long)Memptr, (long)Sdsptr, btoc(nbytes)); 2392 2393 if (pattern_check(Memptr, nbytes, Pattern, Pattern_Length, 0) == 2394 -1) { 2395 doio_fprintf(stderr, 2396 "sds DATA COMPARE ERROR - ABORTING\n%s\n", 2397 format_sds(req, Memptr, Sdsptr, Pattern)); 2398 2399 doio_upanic(U_CORRUPTION); 2400 exit(E_COMPARE); 2401 } 2402 } 2403 } 2404 2405 #else 2406 2407 #ifdef CRAY 2408 2409 int do_ssdio(struct io_req *req) 2410 { 2411 doio_fprintf(stderr, 2412 "Internal Error - do_ssdio() called on a non-cray1 system\n"); 2413 alloc_mem(-1); 2414 exit(E_INTERNAL); 2415 } 2416 2417 #endif /* CRAY */ 2418 2419 #endif /* _CRAY1 */ 2420 2421 char *fmt_ioreq(struct io_req *ioreq, struct syscall_info *sy, int fd) 2422 { 2423 static char *errbuf = NULL; 2424 char *cp; 2425 struct rw_req *io; 2426 struct smap *aname; 2427 #ifdef CRAY 2428 struct stat sbuf; 2429 #endif 2430 #ifdef sgi 2431 struct dioattr finfo; 2432 #endif 2433 2434 if (errbuf == NULL) 2435 errbuf = malloc(32768); 2436 2437 io = &ioreq->r_data.io; 2438 2439 /* 2440 * Look up async I/O completion strategy 2441 */ 2442 for (aname = aionames; 2443 aname->value != -1 && aname->value != io->r_aio_strat; aname++) ; 2444 2445 cp = errbuf; 2446 cp += sprintf(cp, "Request number %d\n", Reqno); 2447 2448 cp += 2449 sprintf(cp, " fd %d is file %s - open flags are %#o %s\n", 2450 fd, io->r_file, io->r_oflags, format_oflags(io->r_oflags)); 2451 2452 if (sy->sy_flags & SY_WRITE) { 2453 cp += 2454 sprintf(cp, 2455 " write done at file offset %d - pattern is %c (%#o)\n", 2456 io->r_offset, 2457 (io->r_pattern == '\0') ? '?' : io->r_pattern, 2458 io->r_pattern); 2459 } else { 2460 cp += sprintf(cp, " read done at file offset %d\n", 2461 io->r_offset); 2462 } 2463 2464 if (sy->sy_flags & SY_ASYNC) { 2465 cp += 2466 sprintf(cp, 2467 " async io completion strategy is %s\n", 2468 aname->string); 2469 } 2470 2471 cp += 2472 sprintf(cp, 2473 " number of requests is %d, strides per request is %d\n", 2474 io->r_nent, io->r_nstrides); 2475 2476 cp += sprintf(cp, " i/o byte count = %d\n", io->r_nbytes); 2477 2478 cp += sprintf(cp, " memory alignment is %s\n", 2479 (io-> 2480 r_uflags & F_WORD_ALIGNED) ? "aligned" : "unaligned"); 2481 2482 #ifdef CRAY 2483 if (io->r_oflags & O_RAW) { 2484 cp += 2485 sprintf(cp, 2486 " RAW I/O: offset %% 4096 = %d length %% 4096 = %d\n", 2487 io->r_offset % 4096, io->r_nbytes % 4096); 2488 fstat(fd, &sbuf); 2489 cp += 2490 sprintf(cp, 2491 " optimal file xfer size: small: %d large: %d\n", 2492 sbuf.st_blksize, sbuf.st_oblksize); 2493 cp += 2494 sprintf(cp, " cblks %d cbits %#o\n", sbuf.st_cblks, 2495 sbuf.st_cbits); 2496 } 2497 #endif 2498 #ifdef sgi 2499 if (io->r_oflags & O_DIRECT) { 2500 2501 if (fcntl(fd, F_DIOINFO, &finfo) == -1) { 2502 cp += 2503 sprintf(cp, 2504 " Error %s (%d) getting direct I/O info\n", 2505 strerror(errno), errno); 2506 finfo.d_mem = 1; 2507 finfo.d_miniosz = 1; 2508 finfo.d_maxiosz = 1; 2509 } 2510 2511 cp += 2512 sprintf(cp, 2513 " DIRECT I/O: offset %% %d = %d length %% %d = %d\n", 2514 finfo.d_miniosz, io->r_offset % finfo.d_miniosz, 2515 io->r_nbytes, io->r_nbytes % finfo.d_miniosz); 2516 cp += 2517 sprintf(cp, 2518 " mem alignment 0x%x xfer size: small: %d large: %d\n", 2519 finfo.d_mem, finfo.d_miniosz, finfo.d_maxiosz); 2520 } 2521 #endif 2522 2523 return (errbuf); 2524 } 2525 2526 /* 2527 * Issue listio requests 2528 */ 2529 #ifdef CRAY 2530 struct status *sy_listio(struct io_req *req, struct syscall_info *sysc, int fd, 2531 char *addr) 2532 { 2533 int offset, nbytes, nstrides, nents, aio_strat; 2534 int aio_id, signo, o, i, lc; 2535 char *a; 2536 struct listreq *lio_req, *l; 2537 struct aio_info *aiop; 2538 struct status *status; 2539 2540 /* 2541 * Initialize common fields - assumes r_oflags, r_file, r_offset, and 2542 * r_nbytes are at the same offset in the read_req and reada_req 2543 * structures. 2544 */ 2545 offset = req->r_data.io.r_offset; 2546 nbytes = req->r_data.io.r_nbytes; 2547 nstrides = req->r_data.io.r_nstrides; 2548 nents = req->r_data.io.r_nent; 2549 aio_strat = req->r_data.io.r_aio_strat; 2550 2551 lc = (sysc->sy_flags & SY_ASYNC) ? LC_START : LC_WAIT; 2552 2553 status = malloc(sizeof(struct status)); 2554 if (status == NULL) { 2555 doio_fprintf(stderr, "malloc failed, %s/%d\n", 2556 __FILE__, __LINE__); 2557 return NULL; 2558 } 2559 status->aioid = malloc((nents + 1) * sizeof(int)); 2560 if (status->aioid == NULL) { 2561 doio_fprintf(stderr, "malloc failed, %s/%d\n", 2562 __FILE__, __LINE__); 2563 return NULL; 2564 } 2565 2566 signo = (aio_strat == A_SIGNAL) ? SIGUSR1 : 0; 2567 2568 lio_req = malloc(nents * sizeof(struct listreq)); 2569 if (lio_req == NULL) { 2570 doio_fprintf(stderr, "malloc failed, %s/%d\n", 2571 __FILE__, __LINE__); 2572 return NULL; 2573 } 2574 for (l = lio_req, a = addr, o = offset, i = 0; 2575 i < nents; l++, a += nbytes, o += nbytes, i++) { 2576 2577 aio_id = aio_register(fd, aio_strat, signo); 2578 aiop = aio_slot(aio_id); 2579 status->aioid[i] = aio_id; 2580 2581 l->li_opcode = (sysc->sy_flags & SY_WRITE) ? LO_WRITE : LO_READ; 2582 l->li_offset = o; 2583 l->li_fildes = fd; 2584 l->li_buf = a; 2585 l->li_nbyte = nbytes; 2586 l->li_status = &aiop->iosw; 2587 l->li_signo = signo; 2588 l->li_nstride = nstrides; 2589 l->li_filstride = 0; 2590 l->li_memstride = 0; 2591 l->li_drvr = 0; 2592 l->li_flags = LF_LSEEK; 2593 } 2594 2595 status->aioid[nents] = -1; /* end sentinel */ 2596 2597 if ((status->rval = listio(lc, lio_req, nents)) == -1) { 2598 status->err = errno; 2599 } 2600 2601 free(lio_req); 2602 return (status); 2603 } 2604 2605 /* 2606 * Calculate the size of a request in bytes and min/max boundaries 2607 * 2608 * This assumes filestride & memstride = 0. 2609 */ 2610 int listio_mem(struct io_req *req, int offset, int fmstride, int *min, int *max) 2611 { 2612 int i, size; 2613 2614 size = stride_bounds(offset, fmstride, 2615 req->r_data.io.r_nstrides * req->r_data.io.r_nent, 2616 req->r_data.io.r_nbytes, min, max); 2617 return (size); 2618 } 2619 2620 char *fmt_listio(struct io_req *req, struct syscall_info *sy, int fd, 2621 char *addr) 2622 { 2623 static char *errbuf = NULL; 2624 char *cp; 2625 char *c, *opcode; 2626 int i; 2627 2628 if (errbuf == NULL) { 2629 errbuf = malloc(32768); 2630 if (errbuf == NULL) { 2631 doio_fprintf(stderr, "malloc failed, %s/%d\n", 2632 __FILE__, __LINE__); 2633 return NULL; 2634 } 2635 } 2636 2637 c = (sy->sy_flags & SY_ASYNC) ? "lc_wait" : "lc_start"; 2638 2639 cp = errbuf; 2640 cp += sprintf(cp, "syscall: listio(%s, (?), %d)\n", 2641 c, req->r_data.io.r_nent); 2642 2643 cp += sprintf(cp, " data buffer at %#o\n", addr); 2644 2645 return (errbuf); 2646 } 2647 #endif /* CRAY */ 2648 2649 #ifdef sgi 2650 struct status *sy_pread(struct io_req *req, struct syscall_info *sysc, int fd, 2651 char *addr) 2652 { 2653 int rc; 2654 struct status *status; 2655 2656 rc = pread(fd, addr, req->r_data.io.r_nbytes, req->r_data.io.r_offset); 2657 2658 status = malloc(sizeof(struct status)); 2659 if (status == NULL) { 2660 doio_fprintf(stderr, "malloc failed, %s/%d\n", 2661 __FILE__, __LINE__); 2662 return NULL; 2663 } 2664 status->aioid = NULL; 2665 status->rval = rc; 2666 status->err = errno; 2667 2668 return (status); 2669 } 2670 2671 struct status *sy_pwrite(struct io_req *req, struct syscall_info *sysc, int fd, 2672 char *addr) 2673 { 2674 int rc; 2675 struct status *status; 2676 2677 rc = pwrite(fd, addr, req->r_data.io.r_nbytes, req->r_data.io.r_offset); 2678 2679 status = malloc(sizeof(struct status)); 2680 if (status == NULL) { 2681 doio_fprintf(stderr, "malloc failed, %s/%d\n", 2682 __FILE__, __LINE__); 2683 return NULL; 2684 } 2685 status->aioid = NULL; 2686 status->rval = rc; 2687 status->err = errno; 2688 2689 return (status); 2690 } 2691 2692 char *fmt_pread(struct io_req *req, struct syscall_info *sy, int fd, char *addr) 2693 { 2694 static char *errbuf = NULL; 2695 char *cp; 2696 2697 if (errbuf == NULL) { 2698 errbuf = malloc(32768); 2699 if (errbuf == NULL) { 2700 doio_fprintf(stderr, "malloc failed, %s/%d\n", 2701 __FILE__, __LINE__); 2702 return NULL; 2703 } 2704 } 2705 2706 cp = errbuf; 2707 cp += sprintf(cp, "syscall: %s(%d, 0x%lx, %d)\n", 2708 sy->sy_name, fd, addr, req->r_data.io.r_nbytes); 2709 return (errbuf); 2710 } 2711 #endif /* sgi */ 2712 2713 #ifndef CRAY 2714 struct status *sy_readv(struct io_req *req, struct syscall_info *sysc, int fd, 2715 char *addr) 2716 { 2717 struct status *sy_rwv(); 2718 return sy_rwv(req, sysc, fd, addr, 0); 2719 } 2720 2721 struct status *sy_writev(struct io_req *req, struct syscall_info *sysc, int fd, 2722 char *addr) 2723 { 2724 struct status *sy_rwv(); 2725 return sy_rwv(req, sysc, fd, addr, 1); 2726 } 2727 2728 struct status *sy_rwv(struct io_req *req, struct syscall_info *sysc, int fd, 2729 char *addr, int rw) 2730 { 2731 int rc; 2732 struct status *status; 2733 struct iovec iov[2]; 2734 2735 status = malloc(sizeof(struct status)); 2736 if (status == NULL) { 2737 doio_fprintf(stderr, "malloc failed, %s/%d\n", 2738 __FILE__, __LINE__); 2739 return NULL; 2740 } 2741 status->aioid = NULL; 2742 2743 /* move to the desired file position. */ 2744 if ((rc = lseek(fd, req->r_data.io.r_offset, SEEK_SET)) == -1) { 2745 status->rval = rc; 2746 status->err = errno; 2747 return (status); 2748 } 2749 2750 iov[0].iov_base = addr; 2751 iov[0].iov_len = req->r_data.io.r_nbytes; 2752 2753 if (rw) 2754 rc = writev(fd, iov, 1); 2755 else 2756 rc = readv(fd, iov, 1); 2757 status->aioid = NULL; 2758 status->rval = rc; 2759 status->err = errno; 2760 return (status); 2761 } 2762 2763 char *fmt_readv(struct io_req *req, struct syscall_info *sy, int fd, char *addr) 2764 { 2765 static char errbuf[32768]; 2766 char *cp; 2767 2768 cp = errbuf; 2769 cp += sprintf(cp, "syscall: %s(%d, (iov on stack), 1)\n", 2770 sy->sy_name, fd); 2771 return (errbuf); 2772 } 2773 #endif /* !CRAY */ 2774 2775 #ifdef sgi 2776 struct status *sy_aread(struct io_req *req, struct syscall_info *sysc, int fd, 2777 char *addr) 2778 { 2779 struct status *sy_arw(); 2780 return sy_arw(req, sysc, fd, addr, 0); 2781 } 2782 2783 struct status *sy_awrite(struct io_req *req, struct syscall_info *sysc, int fd, 2784 char *addr) 2785 { 2786 struct status *sy_arw(); 2787 return sy_arw(req, sysc, fd, addr, 1); 2788 } 2789 2790 /* 2791 #define sy_aread(A, B, C, D) sy_arw(A, B, C, D, 0) 2792 #define sy_awrite(A, B, C, D) sy_arw(A, B, C, D, 1) 2793 */ 2794 2795 struct status *sy_arw(struct io_req *req, struct syscall_info *sysc, int fd, 2796 char *addr, int rw) 2797 { 2798 /* POSIX 1003.1b-1993 Async read */ 2799 struct status *status; 2800 int rc; 2801 int aio_id, aio_strat, signo; 2802 struct aio_info *aiop; 2803 2804 status = malloc(sizeof(struct status)); 2805 if (status == NULL) { 2806 doio_fprintf(stderr, "malloc failed, %s/%d\n", 2807 __FILE__, __LINE__); 2808 return NULL; 2809 } 2810 aio_strat = req->r_data.io.r_aio_strat; 2811 signo = (aio_strat == A_SIGNAL) ? SIGUSR1 : 0; 2812 2813 aio_id = aio_register(fd, aio_strat, signo); 2814 aiop = aio_slot(aio_id); 2815 2816 memset((void *)&aiop->aiocb, 0, sizeof(aiocb_t)); 2817 2818 aiop->aiocb.aio_fildes = fd; 2819 aiop->aiocb.aio_nbytes = req->r_data.io.r_nbytes; 2820 aiop->aiocb.aio_offset = req->r_data.io.r_offset; 2821 aiop->aiocb.aio_buf = addr; 2822 aiop->aiocb.aio_reqprio = 0; /* must be 0 */ 2823 aiop->aiocb.aio_lio_opcode = 0; 2824 2825 if (aio_strat == A_SIGNAL) { /* siginfo(2) stuff */ 2826 aiop->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL; 2827 aiop->aiocb.aio_sigevent.sigev_signo = signo; 2828 } else if (aio_strat == A_CALLBACK) { 2829 aiop->aiocb.aio_sigevent.sigev_signo = 0; 2830 aiop->aiocb.aio_sigevent.sigev_notify = SIGEV_CALLBACK; 2831 aiop->aiocb.aio_sigevent.sigev_func = cb_handler; 2832 aiop->aiocb.aio_sigevent.sigev_value.sival_int = aio_id; 2833 } else { 2834 aiop->aiocb.aio_sigevent.sigev_notify = SIGEV_NONE; 2835 aiop->aiocb.aio_sigevent.sigev_signo = 0; 2836 } 2837 2838 if (rw) 2839 rc = aio_write(&aiop->aiocb); 2840 else 2841 rc = aio_read(&aiop->aiocb); 2842 2843 status->aioid = malloc(2 * sizeof(int)); 2844 if (status->aioid == NULL) { 2845 doio_fprintf(stderr, "malloc failed, %s/%d\n", 2846 __FILE__, __LINE__); 2847 return NULL; 2848 } 2849 status->aioid[0] = aio_id; 2850 status->aioid[1] = -1; 2851 status->rval = rc; 2852 status->err = errno; 2853 return (status); 2854 } 2855 2856 char *fmt_aread(struct io_req *req, struct syscall_info *sy, int fd, char *addr) 2857 { 2858 static char errbuf[32768]; 2859 char *cp; 2860 2861 cp = errbuf; 2862 cp += sprintf(cp, "syscall: %s(&aiop->aiocb)\n", sy->sy_name); 2863 return (errbuf); 2864 } 2865 #endif /* sgi */ 2866 2867 #ifndef CRAY 2868 2869 struct status *sy_mmread(struct io_req *req, struct syscall_info *sysc, int fd, 2870 char *addr) 2871 { 2872 struct status *sy_mmrw(); 2873 return sy_mmrw(req, sysc, fd, addr, 0); 2874 } 2875 2876 struct status *sy_mmwrite(struct io_req *req, struct syscall_info *sysc, int fd, 2877 char *addr) 2878 { 2879 struct status *sy_mmrw(); 2880 return sy_mmrw(req, sysc, fd, addr, 1); 2881 } 2882 2883 struct status *sy_mmrw(struct io_req *req, struct syscall_info *sysc, int fd, 2884 char *addr, int rw) 2885 { 2886 /* 2887 * mmap read/write 2888 * This version is oriented towards mmaping the file to memory 2889 * ONCE and keeping it mapped. 2890 */ 2891 struct status *status; 2892 void *mrc = NULL, *memaddr = NULL; 2893 struct fd_cache *fdc; 2894 struct stat sbuf; 2895 int rc; 2896 2897 status = malloc(sizeof(struct status)); 2898 if (status == NULL) { 2899 doio_fprintf(stderr, "malloc failed, %s/%d\n", 2900 __FILE__, __LINE__); 2901 return NULL; 2902 } 2903 status->aioid = NULL; 2904 status->rval = -1; 2905 2906 fdc = alloc_fdcache(req->r_data.io.r_file, req->r_data.io.r_oflags); 2907 2908 if (v_opt || fdc->c_memaddr == NULL) { 2909 if (fstat(fd, &sbuf) < 0) { 2910 doio_fprintf(stderr, "fstat failed, errno=%d\n", errno); 2911 status->err = errno; 2912 return (status); 2913 } 2914 2915 fdc->c_memlen = (int)sbuf.st_size; 2916 mrc = mmap(NULL, (int)sbuf.st_size, 2917 rw ? PROT_WRITE | PROT_READ : PROT_READ, 2918 MAP_SHARED, fd, 0); 2919 2920 if (mrc == MAP_FAILED) { 2921 doio_fprintf(stderr, "mmap() failed - 0x%lx %d\n", 2922 mrc, errno); 2923 status->err = errno; 2924 return (status); 2925 } 2926 2927 fdc->c_memaddr = mrc; 2928 } 2929 2930 memaddr = (void *)((char *)fdc->c_memaddr + req->r_data.io.r_offset); 2931 2932 active_mmap_rw = 1; 2933 if (rw) 2934 memcpy(memaddr, addr, req->r_data.io.r_nbytes); 2935 else 2936 memcpy(addr, memaddr, req->r_data.io.r_nbytes); 2937 if (v_opt) 2938 msync(fdc->c_memaddr, (int)sbuf.st_size, MS_SYNC); 2939 active_mmap_rw = 0; 2940 2941 status->rval = req->r_data.io.r_nbytes; 2942 status->err = 0; 2943 2944 if (v_opt) { 2945 rc = munmap(mrc, (int)sbuf.st_size); 2946 } 2947 2948 return (status); 2949 } 2950 2951 char *fmt_mmrw(struct io_req *req, struct syscall_info *sy, int fd, char *addr) 2952 { 2953 static char errbuf[32768]; 2954 char *cp; 2955 struct fd_cache *fdc; 2956 void *memaddr; 2957 2958 fdc = alloc_fdcache(req->r_data.io.r_file, req->r_data.io.r_oflags); 2959 2960 cp = errbuf; 2961 cp += sprintf(cp, "syscall: %s(NULL, %d, %s, MAP_SHARED, %d, 0)\n", 2962 sy->sy_name, 2963 fdc->c_memlen, 2964 (sy->sy_flags & SY_WRITE) ? "PROT_WRITE" : "PROT_READ", 2965 fd); 2966 2967 cp += sprintf(cp, "\tfile is mmaped to: 0x%lx\n", 2968 (unsigned long)fdc->c_memaddr); 2969 2970 memaddr = (void *)((char *)fdc->c_memaddr + req->r_data.io.r_offset); 2971 2972 cp += sprintf(cp, "\tfile-mem=0x%lx, length=%d, buffer=0x%lx\n", 2973 (unsigned long)memaddr, req->r_data.io.r_nbytes, 2974 (unsigned long)addr); 2975 2976 return (errbuf); 2977 } 2978 #endif /* !CRAY */ 2979 2980 struct syscall_info syscalls[] = { 2981 #ifdef CRAY 2982 {"listio-read-sync", LREAD, 2983 sy_listio, NULL, fmt_listio, 2984 SY_IOSW}, 2985 {"listio-read-strides-sync", LSREAD, 2986 sy_listio, listio_mem, fmt_listio, 2987 SY_IOSW}, 2988 {"listio-read-reqs-sync", LEREAD, 2989 sy_listio, listio_mem, fmt_listio, 2990 SY_IOSW}, 2991 {"listio-read-async", LREADA, 2992 sy_listio, NULL, fmt_listio, 2993 SY_IOSW | SY_ASYNC}, 2994 {"listio-read-strides-async", LSREADA, 2995 sy_listio, listio_mem, fmt_listio, 2996 SY_IOSW | SY_ASYNC}, 2997 {"listio-read-reqs-async", LEREADA, 2998 sy_listio, listio_mem, fmt_listio, 2999 SY_IOSW | SY_ASYNC}, 3000 {"listio-write-sync", LWRITE, 3001 sy_listio, listio_mem, fmt_listio, 3002 SY_IOSW | SY_WRITE}, 3003 {"listio-write-strides-sync", LSWRITE, 3004 sy_listio, listio_mem, fmt_listio, 3005 SY_IOSW | SY_WRITE}, 3006 {"listio-write-reqs-sync", LEWRITE, 3007 sy_listio, listio_mem, fmt_listio, 3008 SY_IOSW | SY_WRITE}, 3009 {"listio-write-async", LWRITEA, 3010 sy_listio, listio_mem, fmt_listio, 3011 SY_IOSW | SY_WRITE | SY_ASYNC}, 3012 {"listio-write-strides-async", LSWRITEA, 3013 sy_listio, listio_mem, fmt_listio, 3014 SY_IOSW | SY_WRITE | SY_ASYNC}, 3015 {"listio-write-reqs-async", LEWRITEA, 3016 sy_listio, listio_mem, fmt_listio, 3017 SY_IOSW | SY_WRITE | SY_ASYNC}, 3018 #endif 3019 3020 #ifdef sgi 3021 {"aread", AREAD, 3022 sy_aread, NULL, fmt_aread, 3023 SY_IOSW | SY_ASYNC}, 3024 {"awrite", AWRITE, 3025 sy_awrite, NULL, fmt_aread, 3026 SY_IOSW | SY_WRITE | SY_ASYNC}, 3027 {"pread", PREAD, 3028 sy_pread, NULL, fmt_pread, 3029 0}, 3030 {"pwrite", PWRITE, 3031 sy_pwrite, NULL, fmt_pread, 3032 SY_WRITE}, 3033 #endif 3034 3035 #ifndef CRAY 3036 {"readv", READV, 3037 sy_readv, NULL, fmt_readv, 3038 0}, 3039 {"writev", WRITEV, 3040 sy_writev, NULL, fmt_readv, 3041 SY_WRITE}, 3042 {"mmap-read", MMAPR, 3043 sy_mmread, NULL, fmt_mmrw, 3044 0}, 3045 {"mmap-write", MMAPW, 3046 sy_mmwrite, NULL, fmt_mmrw, 3047 SY_WRITE}, 3048 #endif 3049 3050 {NULL, 0, 3051 0, 0, 0, 3052 0}, 3053 }; 3054 3055 int do_rw(struct io_req *req) 3056 { 3057 static int pid = -1; 3058 int fd, offset, nbytes, nstrides, nents, oflags; 3059 int rval, mem_needed, i; 3060 int logged_write, got_lock, pattern; 3061 off_t woffset; 3062 int min_byte, max_byte; 3063 char *addr, *file, *msg; 3064 struct status *s; 3065 struct wlog_rec wrec; 3066 struct syscall_info *sy; 3067 #if defined(CRAY) || defined(sgi) 3068 struct aio_info *aiop; 3069 struct iosw *iosw; 3070 #endif 3071 #ifdef sgi 3072 struct fd_cache *fdc; 3073 #endif 3074 3075 woffset = 0; 3076 3077 /* 3078 * Initialize common fields - assumes r_oflags, r_file, r_offset, and 3079 * r_nbytes are at the same offset in the read_req and reada_req 3080 * structures. 3081 */ 3082 file = req->r_data.io.r_file; 3083 oflags = req->r_data.io.r_oflags; 3084 offset = req->r_data.io.r_offset; 3085 nbytes = req->r_data.io.r_nbytes; 3086 nstrides = req->r_data.io.r_nstrides; 3087 nents = req->r_data.io.r_nent; 3088 pattern = req->r_data.io.r_pattern; 3089 3090 if (nents >= MAX_AIO) { 3091 doio_fprintf(stderr, 3092 "do_rw: too many list requests, %d. Maximum is %d\n", 3093 nents, MAX_AIO); 3094 return (-1); 3095 } 3096 3097 /* 3098 * look up system call info 3099 */ 3100 for (sy = syscalls; sy->sy_name != NULL && sy->sy_type != req->r_type; 3101 sy++) ; 3102 3103 if (sy->sy_name == NULL) { 3104 doio_fprintf(stderr, "do_rw: unknown r_type %d.\n", 3105 req->r_type); 3106 return (-1); 3107 } 3108 3109 /* 3110 * Get an open file descriptor 3111 * Note: must be done before memory allocation so that the direct i/o 3112 * information is available in mem. allocate 3113 */ 3114 3115 if ((fd = alloc_fd(file, oflags)) == -1) 3116 return -1; 3117 3118 /* 3119 * Allocate core memory and possibly sds space. Initialize the 3120 * data to be written. Make sure we get enough, based on the 3121 * memstride. 3122 * 3123 * need: 3124 * 1 extra word for possible partial-word address "bump" 3125 * 1 extra word for dynamic pattern overrun 3126 * MPP_BUMP extra words for T3E non-hw-aligned memory address. 3127 */ 3128 3129 if (sy->sy_buffer != NULL) { 3130 mem_needed = (*sy->sy_buffer) (req, 0, 0, NULL, NULL); 3131 } else { 3132 mem_needed = nbytes; 3133 } 3134 3135 #ifdef CRAY 3136 if ((rval = 3137 alloc_mem(mem_needed + wtob(1) * 2 + 3138 MPP_BUMP * sizeof(UINT64_T))) < 0) { 3139 return rval; 3140 } 3141 #else 3142 #ifdef sgi 3143 /* get memory alignment for using DIRECT I/O */ 3144 fdc = alloc_fdcache(file, oflags); 3145 3146 if ((rval = alloc_mem(mem_needed + wtob(1) * 2 + fdc->c_memalign)) < 0) { 3147 return rval; 3148 } 3149 #else 3150 /* what is !CRAY && !sgi ? */ 3151 if ((rval = alloc_mem(mem_needed + wtob(1) * 2)) < 0) { 3152 return rval; 3153 } 3154 #endif /* sgi */ 3155 #endif /* CRAY */ 3156 3157 Pattern[0] = pattern; 3158 3159 /* 3160 * Allocate SDS space for backdoor write if desired 3161 */ 3162 3163 if (oflags & O_SSD) { 3164 #ifdef CRAY 3165 #ifndef _CRAYMPP 3166 if (alloc_sds(nbytes) == -1) 3167 return -1; 3168 3169 if (sy->sy_flags & SY_WRITE) { 3170 /*pattern_fill(Memptr, mem_needed, Pattern, Pattern_Length, 0); */ 3171 (*Data_Fill) (Memptr, nbytes, Pattern, Pattern_Length, 3172 0); 3173 3174 if (sswrite((long)Memptr, Sdsptr, btoc(mem_needed)) == 3175 -1) { 3176 doio_fprintf(stderr, 3177 "sswrite(%d, %d, %d) failed: %s (%d)\n", 3178 (long)Memptr, Sdsptr, 3179 btoc(mem_needed), SYSERR, errno); 3180 fflush(stderr); 3181 return -1; 3182 } 3183 } 3184 3185 addr = (char *)Sdsptr; 3186 #else 3187 doio_fprintf(stderr, 3188 "Invalid O_SSD flag was generated for MPP system\n"); 3189 fflush(stderr); 3190 return -1; 3191 #endif /* _CRAYMPP */ 3192 #else /* CRAY */ 3193 doio_fprintf(stderr, 3194 "Invalid O_SSD flag was generated for non-Cray system\n"); 3195 fflush(stderr); 3196 return -1; 3197 #endif /* CRAY */ 3198 } else { 3199 addr = Memptr; 3200 3201 /* 3202 * if io is not raw, bump the offset by a random amount 3203 * to generate non-word-aligned io. 3204 * 3205 * On MPP systems, raw I/O must start on an 0x80 byte boundary. 3206 * For non-aligned I/O, bump the address from 1 to 8 words. 3207 */ 3208 3209 if (!(req->r_data.io.r_uflags & F_WORD_ALIGNED)) { 3210 #ifdef _CRAYMPP 3211 addr += 3212 random_range(0, MPP_BUMP, 1, NULL) * sizeof(int); 3213 #endif 3214 addr += random_range(0, wtob(1) - 1, 1, NULL); 3215 } 3216 #ifdef sgi 3217 /* 3218 * Force memory alignment for Direct I/O 3219 */ 3220 if ((oflags & O_DIRECT) && ((long)addr % fdc->c_memalign != 0)) { 3221 addr += 3222 fdc->c_memalign - ((long)addr % fdc->c_memalign); 3223 } 3224 #endif 3225 3226 /* 3227 * FILL must be done on a word-aligned buffer. 3228 * Call the fill function with Memptr which is aligned, 3229 * then memmove it to the right place. 3230 */ 3231 if (sy->sy_flags & SY_WRITE) { 3232 (*Data_Fill) (Memptr, mem_needed, Pattern, 3233 Pattern_Length, 0); 3234 if (addr != Memptr) 3235 memmove(addr, Memptr, mem_needed); 3236 } 3237 } 3238 3239 rval = 0; 3240 got_lock = 0; 3241 logged_write = 0; 3242 3243 /* 3244 * Lock data if this is a write and locking option is set 3245 */ 3246 if (sy->sy_flags & SY_WRITE && k_opt) { 3247 if (sy->sy_buffer != NULL) { 3248 (*sy->sy_buffer) (req, offset, 0, &min_byte, &max_byte); 3249 } else { 3250 min_byte = offset; 3251 max_byte = offset + (nbytes * nstrides * nents); 3252 } 3253 3254 if (lock_file_region(file, fd, F_WRLCK, 3255 min_byte, (max_byte - min_byte + 1)) < 0) { 3256 doio_fprintf(stderr, 3257 "file lock failed:\n%s\n", 3258 fmt_ioreq(req, sy, fd)); 3259 doio_fprintf(stderr, 3260 " buffer(req, %d, 0, 0x%x, 0x%x)\n", 3261 offset, min_byte, max_byte); 3262 alloc_mem(-1); 3263 exit(E_INTERNAL); 3264 } 3265 3266 got_lock = 1; 3267 } 3268 3269 /* 3270 * Write a preliminary write-log entry. This is done so that 3271 * doio_check can do corruption detection across an interrupt/crash. 3272 * Note that w_done is set to 0. If doio_check sees this, it 3273 * re-creates the file extents as if the write completed, but does not 3274 * do any checking - see comments in doio_check for more details. 3275 */ 3276 3277 if (sy->sy_flags & SY_WRITE && w_opt) { 3278 if (pid == -1) { 3279 pid = getpid(); 3280 } 3281 3282 wrec.w_async = (sy->sy_flags & SY_ASYNC) ? 1 : 0; 3283 wrec.w_oflags = oflags; 3284 wrec.w_pid = pid; 3285 wrec.w_offset = offset; 3286 wrec.w_nbytes = nbytes; /* mem_needed -- total length */ 3287 3288 wrec.w_pathlen = strlen(file); 3289 memcpy(wrec.w_path, file, wrec.w_pathlen); 3290 wrec.w_hostlen = strlen(Host); 3291 memcpy(wrec.w_host, Host, wrec.w_hostlen); 3292 wrec.w_patternlen = Pattern_Length; 3293 memcpy(wrec.w_pattern, Pattern, wrec.w_patternlen); 3294 3295 wrec.w_done = 0; 3296 3297 if ((woffset = wlog_record_write(&Wlog, &wrec, -1)) == -1) { 3298 doio_fprintf(stderr, 3299 "Could not append to write-log: %s (%d)\n", 3300 SYSERR, errno); 3301 } else { 3302 logged_write = 1; 3303 } 3304 } 3305 3306 s = (*sy->sy_syscall) (req, sy, fd, addr); 3307 3308 if (s->rval == -1) { 3309 doio_fprintf(stderr, 3310 "%s() request failed: %s (%d)\n%s\n%s\n", 3311 sy->sy_name, SYSERR, errno, 3312 fmt_ioreq(req, sy, fd), 3313 (*sy->sy_format) (req, sy, fd, addr)); 3314 3315 doio_upanic(U_RVAL); 3316 3317 for (i = 0; i < nents; i++) { 3318 if (s->aioid == NULL) 3319 break; 3320 aio_unregister(s->aioid[i]); 3321 } 3322 rval = -1; 3323 } else { 3324 /* 3325 * If the syscall was async, wait for I/O to complete 3326 */ 3327 #ifndef __linux__ 3328 if (sy->sy_flags & SY_ASYNC) { 3329 for (i = 0; i < nents; i++) { 3330 aio_wait(s->aioid[i]); 3331 } 3332 } 3333 #endif 3334 3335 /* 3336 * Check the syscall how-much-data-written return. Look 3337 * for this in either the return value or the 'iosw' 3338 * structure. 3339 */ 3340 3341 if (sy->sy_flags & SY_IOSW) { 3342 #ifdef CRAY 3343 for (i = 0; i < nents; i++) { 3344 if (s->aioid == NULL) 3345 break; /* >>> error condition? */ 3346 aiop = aio_slot(s->aioid[i]); 3347 iosw = &aiop->iosw; 3348 if (iosw->sw_error != 0) { 3349 doio_fprintf(stderr, 3350 "%s() iosw error set: %s\n%s\n%s\n", 3351 sy->sy_name, 3352 strerror(iosw->sw_error), 3353 fmt_ioreq(req, sy, fd), 3354 (*sy->sy_format) (req, sy, 3355 fd, 3356 addr)); 3357 doio_upanic(U_IOSW); 3358 rval = -1; 3359 } else if (iosw->sw_count != nbytes * nstrides) { 3360 doio_fprintf(stderr, 3361 "Bad iosw from %s() #%d\nExpected (%d,%d,%d), got (%d,%d,%d)\n%s\n%s\n", 3362 sy->sy_name, i, 3363 1, 0, nbytes * nstrides, 3364 iosw->sw_flag, 3365 iosw->sw_error, 3366 iosw->sw_count, 3367 fmt_ioreq(req, sy, fd), 3368 (*sy->sy_format) (req, sy, 3369 fd, 3370 addr)); 3371 doio_upanic(U_IOSW); 3372 rval = -1; 3373 } 3374 3375 aio_unregister(s->aioid[i]); 3376 } 3377 #endif /* CRAY */ 3378 #ifdef sgi 3379 for (i = 0; s->aioid[i] != -1; i++) { 3380 if (s->aioid == NULL) { 3381 doio_fprintf(stderr, 3382 "aioid == NULL!\n"); 3383 break; 3384 } 3385 aiop = aio_slot(s->aioid[i]); 3386 3387 /* 3388 * make sure the io completed without error 3389 */ 3390 if (aiop->aio_errno != 0) { 3391 doio_fprintf(stderr, 3392 "%s() aio error set: %s (%d)\n%s\n%s\n", 3393 sy->sy_name, 3394 strerror(aiop->aio_errno), 3395 aiop->aio_errno, 3396 fmt_ioreq(req, sy, fd), 3397 (*sy->sy_format) (req, sy, 3398 fd, 3399 addr)); 3400 doio_upanic(U_IOSW); 3401 rval = -1; 3402 } else if (aiop->aio_ret != nbytes) { 3403 doio_fprintf(stderr, 3404 "Bad aio return from %s() #%d\nExpected (%d,%d), got (%d,%d)\n%s\n%s\n", 3405 sy->sy_name, i, 3406 0, nbytes, 3407 aiop->aio_errno, 3408 aiop->aio_ret, 3409 fmt_ioreq(req, sy, fd), 3410 (*sy->sy_format) (req, sy, 3411 fd, 3412 addr)); 3413 aio_unregister(s->aioid[i]); 3414 doio_upanic(U_IOSW); 3415 return -1; 3416 } else { 3417 aio_unregister(s->aioid[i]); 3418 rval = 0; 3419 } 3420 } 3421 #endif /* sgi */ 3422 } else { 3423 3424 if (s->rval != mem_needed) { 3425 doio_fprintf(stderr, 3426 "%s() request returned wrong # of bytes - expected %d, got %d\n%s\n%s\n", 3427 sy->sy_name, nbytes, s->rval, 3428 fmt_ioreq(req, sy, fd), 3429 (*sy->sy_format) (req, sy, fd, 3430 addr)); 3431 rval = -1; 3432 doio_upanic(U_RVAL); 3433 } 3434 } 3435 } 3436 3437 /* 3438 * Verify that the data was written correctly - check_file() returns 3439 * a non-null pointer which contains an error message if there are 3440 * problems. 3441 */ 3442 3443 if (rval == 0 && sy->sy_flags & SY_WRITE && v_opt) { 3444 msg = check_file(file, offset, nbytes * nstrides * nents, 3445 Pattern, Pattern_Length, 0, 3446 oflags & O_PARALLEL); 3447 if (msg != NULL) { 3448 doio_fprintf(stderr, "%s\n%s\n%s\n", 3449 msg, 3450 fmt_ioreq(req, sy, fd), 3451 (*sy->sy_format) (req, sy, fd, addr)); 3452 doio_upanic(U_CORRUPTION); 3453 exit(E_COMPARE); 3454 } 3455 } 3456 3457 /* 3458 * General cleanup ... 3459 * 3460 * Write extent information to the write-log, so that doio_check can do 3461 * corruption detection. Note that w_done is set to 1, indicating that 3462 * the write has been verified as complete. We don't need to write the 3463 * filename on the second logging. 3464 */ 3465 3466 if (w_opt && logged_write) { 3467 wrec.w_done = 1; 3468 wlog_record_write(&Wlog, &wrec, woffset); 3469 } 3470 3471 /* 3472 * Unlock file region if necessary 3473 */ 3474 3475 if (got_lock) { 3476 if (lock_file_region(file, fd, F_UNLCK, 3477 min_byte, (max_byte - min_byte + 1)) < 0) { 3478 alloc_mem(-1); 3479 exit(E_INTERNAL); 3480 } 3481 } 3482 3483 if (s->aioid != NULL) 3484 free(s->aioid); 3485 free(s); 3486 return (rval == -1) ? -1 : 0; 3487 } 3488 3489 /* 3490 * fcntl-based requests 3491 * - F_FRESVSP 3492 * - F_UNRESVSP 3493 * - F_FSYNC 3494 */ 3495 #ifdef sgi 3496 int do_fcntl(struct io_req *req) 3497 { 3498 int fd, oflags, offset, nbytes; 3499 int rval, op; 3500 int got_lock; 3501 int min_byte, max_byte; 3502 char *file, *msg; 3503 struct flock flk; 3504 3505 /* 3506 * Initialize common fields - assumes r_oflags, r_file, r_offset, and 3507 * r_nbytes are at the same offset in the read_req and reada_req 3508 * structures. 3509 */ 3510 file = req->r_data.io.r_file; 3511 oflags = req->r_data.io.r_oflags; 3512 offset = req->r_data.io.r_offset; 3513 nbytes = req->r_data.io.r_nbytes; 3514 3515 flk.l_type = 0; 3516 flk.l_whence = SEEK_SET; 3517 flk.l_start = offset; 3518 flk.l_len = nbytes; 3519 3520 /* 3521 * Get an open file descriptor 3522 */ 3523 3524 if ((fd = alloc_fd(file, oflags)) == -1) 3525 return -1; 3526 3527 rval = 0; 3528 got_lock = 0; 3529 3530 /* 3531 * Lock data if this is locking option is set 3532 */ 3533 if (k_opt) { 3534 min_byte = offset; 3535 max_byte = offset + nbytes; 3536 3537 if (lock_file_region(file, fd, F_WRLCK, 3538 min_byte, (nbytes + 1)) < 0) { 3539 doio_fprintf(stderr, "file lock failed:\n"); 3540 doio_fprintf(stderr, 3541 " buffer(req, %d, 0, 0x%x, 0x%x)\n", 3542 offset, min_byte, max_byte); 3543 alloc_mem(-1); 3544 exit(E_INTERNAL); 3545 } 3546 3547 got_lock = 1; 3548 } 3549 3550 switch (req->r_type) { 3551 case RESVSP: 3552 op = F_RESVSP; 3553 msg = "f_resvsp"; 3554 break; 3555 case UNRESVSP: 3556 op = F_UNRESVSP; 3557 msg = "f_unresvsp"; 3558 break; 3559 #ifdef F_FSYNC 3560 case DFFSYNC: 3561 op = F_FSYNC; 3562 msg = "f_fsync"; 3563 break; 3564 #endif 3565 } 3566 3567 rval = fcntl(fd, op, &flk); 3568 3569 if (rval == -1) { 3570 doio_fprintf(stderr, 3571 "fcntl %s request failed: %s (%d)\n\tfcntl(%d, %s %d, {%d %lld ==> %lld}\n", 3572 msg, SYSERR, errno, 3573 fd, msg, op, flk.l_whence, 3574 (long long)flk.l_start, (long long)flk.l_len); 3575 3576 doio_upanic(U_RVAL); 3577 rval = -1; 3578 } 3579 3580 /* 3581 * Unlock file region if necessary 3582 */ 3583 3584 if (got_lock) { 3585 if (lock_file_region(file, fd, F_UNLCK, 3586 min_byte, (max_byte - min_byte + 1)) < 0) { 3587 alloc_mem(-1); 3588 exit(E_INTERNAL); 3589 } 3590 } 3591 3592 return (rval == -1) ? -1 : 0; 3593 } 3594 #endif /* sgi */ 3595 3596 /* 3597 * fsync(2) and fdatasync(2) 3598 */ 3599 #ifndef CRAY 3600 int do_sync(struct io_req *req) 3601 { 3602 int fd, oflags; 3603 int rval; 3604 char *file; 3605 3606 /* 3607 * Initialize common fields - assumes r_oflags, r_file, r_offset, and 3608 * r_nbytes are at the same offset in the read_req and reada_req 3609 * structures. 3610 */ 3611 file = req->r_data.io.r_file; 3612 oflags = req->r_data.io.r_oflags; 3613 3614 /* 3615 * Get an open file descriptor 3616 */ 3617 3618 if ((fd = alloc_fd(file, oflags)) == -1) 3619 return -1; 3620 3621 rval = 0; 3622 switch (req->r_type) { 3623 case FSYNC2: 3624 rval = fsync(fd); 3625 break; 3626 case FDATASYNC: 3627 rval = fdatasync(fd); 3628 break; 3629 default: 3630 rval = -1; 3631 } 3632 return (rval == -1) ? -1 : 0; 3633 } 3634 #endif /* !CRAY */ 3635 3636 int 3637 doio_pat_fill(char *addr, int mem_needed, char *Pattern, int Pattern_Length, 3638 int shift) 3639 { 3640 return pattern_fill(addr, mem_needed, Pattern, Pattern_Length, 0); 3641 } 3642 3643 char *doio_pat_check(char *buf, int offset, int length, char *pattern, 3644 int pattern_length, int patshift) 3645 { 3646 static char errbuf[4096]; 3647 int nb, i, pattern_index; 3648 char *cp, *bufend, *ep; 3649 char actual[33], expected[33]; 3650 3651 if (pattern_check(buf, length, pattern, pattern_length, patshift) != 0) { 3652 ep = errbuf; 3653 ep += 3654 sprintf(ep, 3655 "Corrupt regions follow - unprintable chars are represented as '.'\n"); 3656 ep += 3657 sprintf(ep, 3658 "-----------------------------------------------------------------\n"); 3659 3660 pattern_index = patshift % pattern_length;; 3661 cp = buf; 3662 bufend = buf + length; 3663 3664 while (cp < bufend) { 3665 if (*cp != pattern[pattern_index]) { 3666 nb = bufend - cp; 3667 if ((unsigned int)nb > sizeof(expected) - 1) { 3668 nb = sizeof(expected) - 1; 3669 } 3670 3671 ep += 3672 sprintf(ep, 3673 "corrupt bytes starting at file offset %d\n", 3674 offset + (int)(cp - buf)); 3675 3676 /* 3677 * Fill in the expected and actual patterns 3678 */ 3679 memset(expected, 0x00, sizeof(expected)); 3680 memset(actual, 0x00, sizeof(actual)); 3681 3682 for (i = 0; i < nb; i++) { 3683 expected[i] = 3684 pattern[(pattern_index + 3685 i) % pattern_length]; 3686 if (!isprint(expected[i])) { 3687 expected[i] = '.'; 3688 } 3689 3690 actual[i] = cp[i]; 3691 if (!isprint(actual[i])) { 3692 actual[i] = '.'; 3693 } 3694 } 3695 3696 ep += 3697 sprintf(ep, 3698 " 1st %2d expected bytes: %s\n", 3699 nb, expected); 3700 ep += 3701 sprintf(ep, 3702 " 1st %2d actual bytes: %s\n", 3703 nb, actual); 3704 fflush(stderr); 3705 return errbuf; 3706 } else { 3707 cp++; 3708 pattern_index++; 3709 3710 if (pattern_index == pattern_length) { 3711 pattern_index = 0; 3712 } 3713 } 3714 } 3715 return errbuf; 3716 } 3717 3718 return NULL; 3719 } 3720 3721 /* 3722 * Check the contents of a file beginning at offset, for length bytes. It 3723 * is assumed that there is a string of pattern bytes in this area of the 3724 * file. Use normal buffered reads to do the verification. 3725 * 3726 * If there is a data mismatch, write a detailed message into a static buffer 3727 * suitable for the caller to print. Otherwise print NULL. 3728 * 3729 * The fsa flag is set to non-zero if the buffer should be read back through 3730 * the FSA (unicos/mk). This implies the file will be opened 3731 * O_PARALLEL|O_RAW|O_WELLFORMED to do the validation. We must do this because 3732 * FSA will not allow the file to be opened for buffered io if it was 3733 * previously opened for O_PARALLEL io. 3734 */ 3735 3736 char *check_file(char *file, int offset, int length, char *pattern, 3737 int pattern_length, int patshift, int fsa) 3738 { 3739 static char errbuf[4096]; 3740 int fd, nb, flags; 3741 char *buf, *em, *ep; 3742 #ifdef sgi 3743 struct fd_cache *fdc; 3744 #endif 3745 3746 buf = Memptr; 3747 3748 if (V_opt) { 3749 flags = Validation_Flags | O_RDONLY; 3750 } else { 3751 flags = O_RDONLY; 3752 if (fsa) { 3753 #ifdef CRAY 3754 flags |= O_PARALLEL | O_RAW | O_WELLFORMED; 3755 #endif 3756 } 3757 } 3758 3759 if ((fd = alloc_fd(file, flags)) == -1) { 3760 sprintf(errbuf, 3761 "Could not open file %s with flags %#o (%s) for data comparison: %s (%d)\n", 3762 file, flags, format_oflags(flags), SYSERR, errno); 3763 return errbuf; 3764 } 3765 3766 if (lseek(fd, offset, SEEK_SET) == -1) { 3767 sprintf(errbuf, 3768 "Could not lseek to offset %d in %s for verification: %s (%d)\n", 3769 offset, file, SYSERR, errno); 3770 return errbuf; 3771 } 3772 #ifdef sgi 3773 /* Irix: Guarantee a properly aligned address on Direct I/O */ 3774 fdc = alloc_fdcache(file, flags); 3775 if ((flags & O_DIRECT) && ((long)buf % fdc->c_memalign != 0)) { 3776 buf += fdc->c_memalign - ((long)buf % fdc->c_memalign); 3777 } 3778 #endif 3779 3780 if ((nb = read(fd, buf, length)) == -1) { 3781 #ifdef sgi 3782 sprintf(errbuf, 3783 "Could not read %d bytes from %s for verification: %s (%d)\n\tread(%d, 0x%lx, %d)\n\tbuf %% alignment(%d) = %ld\n", 3784 length, file, SYSERR, errno, 3785 fd, buf, length, 3786 fdc->c_memalign, (long)buf % fdc->c_memalign); 3787 #else 3788 sprintf(errbuf, 3789 "Could not read %d bytes from %s for verification: %s (%d)\n", 3790 length, file, SYSERR, errno); 3791 3792 #endif 3793 return errbuf; 3794 } 3795 3796 if (nb != length) { 3797 sprintf(errbuf, 3798 "Read wrong # bytes from %s. Expected %d, got %d\n", 3799 file, length, nb); 3800 return errbuf; 3801 } 3802 3803 if ((em = 3804 (*Data_Check) (buf, offset, length, pattern, pattern_length, 3805 patshift)) != NULL) { 3806 ep = errbuf; 3807 ep += sprintf(ep, "*** DATA COMPARISON ERROR ***\n"); 3808 ep += 3809 sprintf(ep, "check_file(%s, %d, %d, %s, %d, %d) failed\n\n", 3810 file, offset, length, pattern, pattern_length, 3811 patshift); 3812 ep += 3813 sprintf(ep, "Comparison fd is %d, with open flags %#o\n", 3814 fd, flags); 3815 strcpy(ep, em); 3816 return (errbuf); 3817 } 3818 return NULL; 3819 } 3820 3821 /* 3822 * Function to single-thread stdio output. 3823 */ 3824 3825 int doio_fprintf(FILE * stream, char *format, ...) 3826 { 3827 static int pid = -1; 3828 char *date; 3829 int rval; 3830 struct flock flk; 3831 va_list arglist; 3832 struct timeval ts; 3833 gettimeofday(&ts, NULL); 3834 date = hms(ts.tv_sec); 3835 3836 if (pid == -1) { 3837 pid = getpid(); 3838 } 3839 3840 flk.l_whence = flk.l_start = flk.l_len = 0; 3841 flk.l_type = F_WRLCK; 3842 fcntl(fileno(stream), F_SETLKW, &flk); 3843 3844 va_start(arglist, format); 3845 rval = fprintf(stream, "\n%s%s (%5d) %s\n", Prog, TagName, pid, date); 3846 rval += fprintf(stream, "---------------------\n"); 3847 vfprintf(stream, format, arglist); 3848 va_end(arglist); 3849 3850 fflush(stream); 3851 3852 flk.l_type = F_UNLCK; 3853 fcntl(fileno(stream), F_SETLKW, &flk); 3854 3855 return rval; 3856 } 3857 3858 /* 3859 * Simple function for allocating core memory. Uses Memsize and Memptr to 3860 * keep track of the current amount allocated. 3861 */ 3862 #ifndef CRAY 3863 int alloc_mem(int nbytes) 3864 { 3865 char *cp; 3866 void *addr; 3867 int me = 0, flags, key, shmid; 3868 static int mturn = 0; /* which memory type to use */ 3869 struct memalloc *M; 3870 char filename[255]; 3871 #ifdef __linux__ 3872 struct shmid_ds shm_ds; 3873 #endif 3874 3875 #ifdef __linux__ 3876 memset(&shm_ds, 0x00, sizeof(struct shmid_ds)); 3877 #endif 3878 3879 /* nbytes = -1 means "free all allocated memory" */ 3880 if (nbytes == -1) { 3881 3882 for (me = 0; me < Nmemalloc; me++) { 3883 if (Memalloc[me].space == NULL) 3884 continue; 3885 3886 switch (Memalloc[me].memtype) { 3887 case MEM_DATA: 3888 #ifdef sgi 3889 if (Memalloc[me].flags & MEMF_MPIN) 3890 munpin(Memalloc[me].space, 3891 Memalloc[me].size); 3892 #endif 3893 free(Memalloc[me].space); 3894 Memalloc[me].space = NULL; 3895 Memptr = NULL; 3896 Memsize = 0; 3897 break; 3898 case MEM_SHMEM: 3899 #ifdef sgi 3900 if (Memalloc[me].flags & MEMF_MPIN) 3901 munpin(Memalloc[me].space, 3902 Memalloc[me].size); 3903 #endif 3904 shmdt(Memalloc[me].space); 3905 Memalloc[me].space = NULL; 3906 #ifdef sgi 3907 shmctl(Memalloc[me].fd, IPC_RMID); 3908 #else 3909 shmctl(Memalloc[me].fd, IPC_RMID, &shm_ds); 3910 #endif 3911 break; 3912 case MEM_MMAP: 3913 #ifdef sgi 3914 if (Memalloc[me].flags & MEMF_MPIN) 3915 munpin(Memalloc[me].space, 3916 Memalloc[me].size); 3917 #endif 3918 munmap(Memalloc[me].space, Memalloc[me].size); 3919 close(Memalloc[me].fd); 3920 if (Memalloc[me].flags & MEMF_FILE) { 3921 unlink(Memalloc[me].name); 3922 } 3923 Memalloc[me].space = NULL; 3924 break; 3925 default: 3926 doio_fprintf(stderr, 3927 "alloc_mem: HELP! Unknown memory space type %d index %d\n", 3928 Memalloc[me].memtype, me); 3929 break; 3930 } 3931 } 3932 return 0; 3933 } 3934 3935 /* 3936 * Select a memory area (currently round-robbin) 3937 */ 3938 3939 if (mturn >= Nmemalloc) 3940 mturn = 0; 3941 3942 M = &Memalloc[mturn]; 3943 3944 switch (M->memtype) { 3945 case MEM_DATA: 3946 if (nbytes > M->size) { 3947 if (M->space != NULL) { 3948 #ifdef sgi 3949 if (M->flags & MEMF_MPIN) 3950 munpin(M->space, M->size); 3951 #endif 3952 free(M->space); 3953 } 3954 M->space = NULL; 3955 M->size = 0; 3956 } 3957 3958 if (M->space == NULL) { 3959 if ((cp = malloc(nbytes)) == NULL) { 3960 doio_fprintf(stderr, 3961 "malloc(%d) failed: %s (%d)\n", 3962 nbytes, SYSERR, errno); 3963 return -1; 3964 } 3965 #ifdef sgi 3966 if (M->flags & MEMF_MPIN) { 3967 if (mpin(cp, nbytes) == -1) { 3968 doio_fprintf(stderr, 3969 "mpin(0x%lx, %d) failed: %s (%d)\n", 3970 cp, nbytes, SYSERR, errno); 3971 } 3972 } 3973 #endif 3974 M->space = (void *)cp; 3975 M->size = nbytes; 3976 } 3977 break; 3978 3979 case MEM_MMAP: 3980 if (nbytes > M->size) { 3981 if (M->space != NULL) { 3982 #ifdef sgi 3983 if (M->flags & MEMF_MPIN) 3984 munpin(M->space, M->size); 3985 #endif 3986 munmap(M->space, M->size); 3987 close(M->fd); 3988 if (M->flags & MEMF_FILE) 3989 unlink(M->name); 3990 } 3991 M->space = NULL; 3992 M->size = 0; 3993 } 3994 3995 if (M->space == NULL) { 3996 if (strchr(M->name, '%')) { 3997 sprintf(filename, M->name, getpid()); 3998 M->name = strdup(filename); 3999 } 4000 4001 if ((M->fd = 4002 open(M->name, O_CREAT | O_RDWR, 0666)) == -1) { 4003 doio_fprintf(stderr, 4004 "alloc_mmap: error %d (%s) opening '%s'\n", 4005 errno, SYSERR, M->name); 4006 return (-1); 4007 } 4008 4009 addr = NULL; 4010 flags = 0; 4011 M->size = nbytes * 4; 4012 4013 /* bias addr if MEMF_ADDR | MEMF_FIXADDR */ 4014 /* >>> how to pick a memory address? */ 4015 4016 /* bias flags on MEMF_PRIVATE etc */ 4017 if (M->flags & MEMF_PRIVATE) 4018 flags |= MAP_PRIVATE; 4019 #ifdef sgi 4020 if (M->flags & MEMF_LOCAL) 4021 flags |= MAP_LOCAL; 4022 if (M->flags & MEMF_AUTORESRV) 4023 flags |= MAP_AUTORESRV; 4024 if (M->flags & MEMF_AUTOGROW) 4025 flags |= MAP_AUTOGROW; 4026 #endif 4027 if (M->flags & MEMF_SHARED) 4028 flags |= MAP_SHARED; 4029 4030 /*printf("alloc_mem, about to mmap, fd=%d, name=(%s)\n", M->fd, M->name);*/ 4031 if ((M->space = mmap(addr, M->size, 4032 PROT_READ | PROT_WRITE, 4033 flags, M->fd, 0)) 4034 == MAP_FAILED) { 4035 doio_fprintf(stderr, 4036 "alloc_mem: mmap error. errno %d (%s)\n\tmmap(addr 0x%x, size %d, read|write 0x%x, mmap flags 0x%x [%#o], fd %d, 0)\n\tfile %s\n", 4037 errno, SYSERR, addr, M->size, 4038 PROT_READ | PROT_WRITE, flags, 4039 M->flags, M->fd, M->name); 4040 doio_fprintf(stderr, "\t%s%s%s%s%s", 4041 (flags & MAP_PRIVATE) ? "private " 4042 : "", 4043 #ifdef sgi 4044 (flags & MAP_LOCAL) ? "local " : 4045 "", 4046 (flags & MAP_AUTORESRV) ? 4047 "autoresrv " : "", 4048 (flags & MAP_AUTOGROW) ? 4049 "autogrow " : "", 4050 #endif 4051 (flags & MAP_SHARED) ? "shared" : 4052 ""); 4053 return (-1); 4054 } 4055 } 4056 break; 4057 4058 case MEM_SHMEM: 4059 if (nbytes > M->size) { 4060 if (M->space != NULL) { 4061 #ifdef sgi 4062 if (M->flags & MEMF_MPIN) 4063 munpin(M->space, M->size); 4064 #endif 4065 shmdt(M->space); 4066 #ifdef sgi 4067 shmctl(M->fd, IPC_RMID); 4068 #else 4069 shmctl(M->fd, IPC_RMID, &shm_ds); 4070 #endif 4071 } 4072 M->space = NULL; 4073 M->size = 0; 4074 } 4075 4076 if (M->space == NULL) { 4077 if (!strcmp(M->name, "private")) { 4078 key = IPC_PRIVATE; 4079 } else { 4080 sscanf(M->name, "%i", &key); 4081 } 4082 4083 M->size = M->nblks ? M->nblks * 512 : nbytes; 4084 4085 if (nbytes > M->size) { 4086 #ifdef DEBUG 4087 doio_fprintf(stderr, 4088 "MEM_SHMEM: nblks(%d) too small: nbytes=%d Msize=%d, skipping this req.\n", 4089 M->nblks, nbytes, M->size); 4090 #endif 4091 return SKIP_REQ; 4092 } 4093 4094 shmid = shmget(key, M->size, IPC_CREAT | 0666); 4095 if (shmid == -1) { 4096 doio_fprintf(stderr, 4097 "shmget(0x%x, %d, CREAT) failed: %s (%d)\n", 4098 key, M->size, SYSERR, errno); 4099 return (-1); 4100 } 4101 M->fd = shmid; 4102 M->space = shmat(shmid, NULL, SHM_RND); 4103 if (M->space == (void *)-1) { 4104 doio_fprintf(stderr, 4105 "shmat(0x%x, NULL, SHM_RND) failed: %s (%d)\n", 4106 shmid, SYSERR, errno); 4107 return (-1); 4108 } 4109 #ifdef sgi 4110 if (M->flags & MEMF_MPIN) { 4111 if (mpin(M->space, M->size) == -1) { 4112 doio_fprintf(stderr, 4113 "mpin(0x%lx, %d) failed: %s (%d)\n", 4114 M->space, M->size, SYSERR, 4115 errno); 4116 } 4117 } 4118 #endif 4119 } 4120 break; 4121 4122 default: 4123 doio_fprintf(stderr, 4124 "alloc_mem: HELP! Unknown memory space type %d index %d\n", 4125 Memalloc[me].memtype, mturn); 4126 break; 4127 } 4128 4129 Memptr = M->space; 4130 Memsize = M->size; 4131 4132 mturn++; 4133 return 0; 4134 } 4135 #else /* CRAY */ 4136 int alloc_mem(int nbytes) 4137 { 4138 char *cp; 4139 int ip; 4140 static char *malloc_space; 4141 4142 /* 4143 * The "unicos" version of this did some stuff with sbrk; 4144 * this caused problems with async I/O on irix, and now appears 4145 * to be causing problems with FSA I/O on unicos/mk. 4146 */ 4147 #ifdef NOTDEF 4148 if (nbytes > Memsize) { 4149 if ((cp = (char *)sbrk(nbytes - Memsize)) == (char *)-1) { 4150 doio_fprintf(stderr, "sbrk(%d) failed: %s (%d)\n", 4151 nbytes - Memsize, SYSERR, errno); 4152 return -1; 4153 } 4154 4155 if (Memsize == 0) 4156 Memptr = cp; 4157 Memsize += nbytes - Memsize; 4158 } 4159 #else 4160 4161 /* nbytes = -1 means "free all allocated memory" */ 4162 if (nbytes == -1) { 4163 free(malloc_space); 4164 Memptr = NULL; 4165 Memsize = 0; 4166 return 0; 4167 } 4168 4169 if (nbytes > Memsize) { 4170 if (Memsize != 0) 4171 free(malloc_space); 4172 4173 if ((cp = malloc_space = malloc(nbytes)) == NULL) { 4174 doio_fprintf(stderr, "malloc(%d) failed: %s (%d)\n", 4175 nbytes, SYSERR, errno); 4176 return -1; 4177 } 4178 #ifdef _CRAYT3E 4179 /* T3E requires memory to be aligned on 0x40 word boundaries */ 4180 ip = (int)cp; 4181 if (ip & 0x3F != 0) { 4182 doio_fprintf(stderr, 4183 "malloc(%d) = 0x%x(0x%x) not aligned by 0x%x\n", 4184 nbytes, cp, ip, ip & 0x3f); 4185 4186 free(cp); 4187 if ((cp = malloc_space = malloc(nbytes + 0x40)) == NULL) { 4188 doio_fprintf(stderr, 4189 "malloc(%d) failed: %s (%d)\n", 4190 nbytes, SYSERR, errno); 4191 return -1; 4192 } 4193 ip = (int)cp; 4194 cp += (0x40 - (ip & 0x3F)); 4195 } 4196 #endif /* _CRAYT3E */ 4197 Memptr = cp; 4198 Memsize = nbytes; 4199 } 4200 #endif /* NOTDEF */ 4201 return 0; 4202 } 4203 #endif /* CRAY */ 4204 4205 /* 4206 * Simple function for allocating sds space. Uses Sdssize and Sdsptr to 4207 * keep track of location and size of currently allocated chunk. 4208 */ 4209 4210 #ifdef _CRAY1 4211 4212 int alloc_sds(int nbytes) 4213 { 4214 int nblks; 4215 4216 if (nbytes > Sdssize) { 4217 if ((nblks = ssbreak(btoc(nbytes - Sdssize))) == -1) { 4218 doio_fprintf(stderr, "ssbreak(%d) failed: %s (%d)\n", 4219 btoc(nbytes - Sdssize), SYSERR, errno); 4220 return -1; 4221 } 4222 4223 Sdssize = ctob(nblks); 4224 Sdsptr = 0; 4225 } 4226 4227 return 0; 4228 } 4229 4230 #else 4231 4232 #ifdef CRAY 4233 4234 int alloc_sds(int nbytes) 4235 { 4236 doio_fprintf(stderr, 4237 "Internal Error - alloc_sds() called on a CRAY2 system\n"); 4238 alloc_mem(-1); 4239 exit(E_INTERNAL); 4240 } 4241 4242 #endif 4243 4244 #endif /* _CRAY1 */ 4245 4246 /* 4247 * Function to maintain a file descriptor cache, so that doio does not have 4248 * to do so many open() and close() calls. Descriptors are stored in the 4249 * cache by file name, and open flags. Each entry also has a _rtc value 4250 * associated with it which is used in aging. If doio cannot open a file 4251 * because it already has too many open (ie. system limit hit) it will close 4252 * the one in the cache that has the oldest _rtc value. 4253 * 4254 * If alloc_fd() is called with a file of NULL, it will close all descriptors 4255 * in the cache, and free the memory in the cache. 4256 */ 4257 4258 int alloc_fd(char *file, int oflags) 4259 { 4260 struct fd_cache *fdc; 4261 struct fd_cache *alloc_fdcache(char *file, int oflags); 4262 4263 fdc = alloc_fdcache(file, oflags); 4264 if (fdc != NULL) 4265 return (fdc->c_fd); 4266 else 4267 return (-1); 4268 } 4269 4270 struct fd_cache *alloc_fdcache(char *file, int oflags) 4271 { 4272 int fd; 4273 struct fd_cache *free_slot, *oldest_slot, *cp; 4274 static int cache_size = 0; 4275 static struct fd_cache *cache = NULL; 4276 #ifdef sgi 4277 struct dioattr finfo; 4278 #endif 4279 4280 /* 4281 * If file is NULL, it means to free up the fd cache. 4282 */ 4283 4284 if (file == NULL && cache != NULL) { 4285 for (cp = cache; cp < &cache[cache_size]; cp++) { 4286 if (cp->c_fd != -1) { 4287 close(cp->c_fd); 4288 } 4289 #ifndef CRAY 4290 if (cp->c_memaddr != NULL) { 4291 munmap(cp->c_memaddr, cp->c_memlen); 4292 } 4293 #endif 4294 } 4295 4296 free(cache); 4297 cache = NULL; 4298 cache_size = 0; 4299 return 0; 4300 } 4301 4302 free_slot = NULL; 4303 oldest_slot = NULL; 4304 4305 /* 4306 * Look for a fd in the cache. If one is found, return it directly. 4307 * Otherwise, when this loop exits, oldest_slot will point to the 4308 * oldest fd slot in the cache, and free_slot will point to an 4309 * unoccupied slot if there are any. 4310 */ 4311 4312 for (cp = cache; cp != NULL && cp < &cache[cache_size]; cp++) { 4313 if (cp->c_fd != -1 && 4314 cp->c_oflags == oflags && strcmp(cp->c_file, file) == 0) { 4315 #ifdef CRAY 4316 cp->c_rtc = _rtc(); 4317 #else 4318 cp->c_rtc = Reqno; 4319 #endif 4320 return cp; 4321 } 4322 4323 if (cp->c_fd == -1) { 4324 if (free_slot == NULL) { 4325 free_slot = cp; 4326 } 4327 } else { 4328 if (oldest_slot == NULL || 4329 cp->c_rtc < oldest_slot->c_rtc) { 4330 oldest_slot = cp; 4331 } 4332 } 4333 } 4334 4335 /* 4336 * No matching file/oflags pair was found in the cache. Attempt to 4337 * open a new fd. 4338 */ 4339 4340 if ((fd = open(file, oflags, 0666)) < 0) { 4341 if (errno != EMFILE) { 4342 doio_fprintf(stderr, 4343 "Could not open file %s with flags %#o (%s): %s (%d)\n", 4344 file, oflags, format_oflags(oflags), 4345 SYSERR, errno); 4346 alloc_mem(-1); 4347 exit(E_SETUP); 4348 } 4349 4350 /* 4351 * If we get here, we have as many open fd's as we can have. 4352 * Close the oldest one in the cache (pointed to by 4353 * oldest_slot), and attempt to re-open. 4354 */ 4355 4356 close(oldest_slot->c_fd); 4357 oldest_slot->c_fd = -1; 4358 free_slot = oldest_slot; 4359 4360 if ((fd = open(file, oflags, 0666)) < 0) { 4361 doio_fprintf(stderr, 4362 "Could not open file %s with flags %#o (%s): %s (%d)\n", 4363 file, oflags, format_oflags(oflags), 4364 SYSERR, errno); 4365 alloc_mem(-1); 4366 exit(E_SETUP); 4367 } 4368 } 4369 4370 /*printf("alloc_fd: new file %s flags %#o fd %d\n", file, oflags, fd);*/ 4371 4372 /* 4373 * If we get here, fd is our open descriptor. If free_slot is NULL, 4374 * we need to grow the cache, otherwise free_slot is the slot that 4375 * should hold the fd info. 4376 */ 4377 4378 if (free_slot == NULL) { 4379 cache = 4380 (struct fd_cache *)realloc(cache, 4381 sizeof(struct fd_cache) * 4382 (FD_ALLOC_INCR + cache_size)); 4383 if (cache == NULL) { 4384 doio_fprintf(stderr, 4385 "Could not malloc() space for fd chace"); 4386 alloc_mem(-1); 4387 exit(E_SETUP); 4388 } 4389 4390 cache_size += FD_ALLOC_INCR; 4391 4392 for (cp = &cache[cache_size - FD_ALLOC_INCR]; 4393 cp < &cache[cache_size]; cp++) { 4394 cp->c_fd = -1; 4395 } 4396 4397 free_slot = &cache[cache_size - FD_ALLOC_INCR]; 4398 } 4399 4400 /* 4401 * finally, fill in the cache slot info 4402 */ 4403 4404 free_slot->c_fd = fd; 4405 free_slot->c_oflags = oflags; 4406 strcpy(free_slot->c_file, file); 4407 #ifdef CRAY 4408 free_slot->c_rtc = _rtc(); 4409 #else 4410 free_slot->c_rtc = Reqno; 4411 #endif 4412 4413 #ifdef sgi 4414 if (oflags & O_DIRECT) { 4415 if (fcntl(fd, F_DIOINFO, &finfo) == -1) { 4416 finfo.d_mem = 1; 4417 finfo.d_miniosz = 1; 4418 finfo.d_maxiosz = 1; 4419 } 4420 } else { 4421 finfo.d_mem = 1; 4422 finfo.d_miniosz = 1; 4423 finfo.d_maxiosz = 1; 4424 } 4425 4426 free_slot->c_memalign = finfo.d_mem; 4427 free_slot->c_miniosz = finfo.d_miniosz; 4428 free_slot->c_maxiosz = finfo.d_maxiosz; 4429 #endif /* sgi */ 4430 #ifndef CRAY 4431 free_slot->c_memaddr = NULL; 4432 free_slot->c_memlen = 0; 4433 #endif 4434 4435 return free_slot; 4436 } 4437 4438 /* 4439 * 4440 * Signal Handling Section 4441 * 4442 * 4443 */ 4444 4445 #ifdef sgi 4446 /* 4447 * "caller-id" for signals 4448 */ 4449 void signal_info(int sig, siginfo_t * info, void *v) 4450 { 4451 int haveit = 0; 4452 4453 if (info != NULL) { 4454 switch (info->si_code) { 4455 case SI_USER: 4456 doio_fprintf(stderr, 4457 "signal_info: si_signo %d si_errno %d si_code SI_USER pid %d uid %d\n", 4458 info->si_signo, info->si_errno, 4459 info->si_pid, info->si_uid); 4460 haveit = 1; 4461 break; 4462 4463 case SI_QUEUE: 4464 doio_fprintf(stderr, 4465 "signal_info si_signo %d si_code = SI_QUEUE\n", 4466 info->si_signo); 4467 haveit = 1; 4468 break; 4469 } 4470 4471 if (!haveit) { 4472 if ((info->si_signo == SIGSEGV) || 4473 (info->si_signo == SIGBUS)) { 4474 doio_fprintf(stderr, 4475 "signal_info si_signo %d si_errno %d si_code = %d si_addr=%p active_mmap_rw=%d havesigint=%d\n", 4476 info->si_signo, info->si_errno, 4477 info->si_code, info->si_addr, 4478 active_mmap_rw, havesigint); 4479 haveit = 1; 4480 } 4481 } 4482 4483 if (!haveit) { 4484 doio_fprintf(stderr, 4485 "signal_info: si_signo %d si_errno %d unknown code %d\n", 4486 info->si_signo, info->si_errno, 4487 info->si_code); 4488 } 4489 } else { 4490 doio_fprintf(stderr, "signal_info: sig %d\n", sig); 4491 } 4492 } 4493 4494 void cleanup_handler(int sig, siginfo_t * info, void *v) 4495 { 4496 havesigint = 1; /* in case there's a followup signal */ 4497 /*signal_info(sig, info, v); *//* be quiet on "normal" kill */ 4498 alloc_mem(-1); 4499 exit(0); 4500 } 4501 4502 void die_handler(int sig, siginfo_t * info, void *v) 4503 { 4504 doio_fprintf(stderr, "terminating on signal %d\n", sig); 4505 signal_info(sig, info, v); 4506 alloc_mem(-1); 4507 exit(1); 4508 } 4509 4510 void sigbus_handler(int sig, siginfo_t * info, void *v) 4511 { 4512 /* While we are doing a memcpy to/from an mmapped region we can 4513 get a SIGBUS for a variety of reasons--and not all of them 4514 should be considered failures. 4515 4516 Under normal conditions if we get a SIGINT it means we've been 4517 told to shutdown. However, if we're currently doing the above- 4518 mentioned memcopy then the kernel will follow that SIGINT with 4519 a SIGBUS. We can guess that we're in this situation by seeing 4520 that the si_errno field in the siginfo structure has EINTR as 4521 an errno. (We might make the guess stronger by looking at the 4522 si_addr field to see that it's not faulting off the end of the 4523 mmapped region, but it seems that in such a case havesigint 4524 would not have been set so maybe that doesn't make the guess 4525 stronger.) 4526 */ 4527 4528 if (active_mmap_rw && havesigint && (info->si_errno == EINTR)) { 4529 cleanup_handler(sig, info, v); 4530 } else { 4531 die_handler(sig, info, v); 4532 } 4533 } 4534 #else 4535 4536 void cleanup_handler(int sig) 4537 { 4538 havesigint = 1; /* in case there's a followup signal */ 4539 alloc_mem(-1); 4540 exit(0); 4541 } 4542 4543 void die_handler(int sig) 4544 { 4545 doio_fprintf(stderr, "terminating on signal %d\n", sig); 4546 alloc_mem(-1); 4547 exit(1); 4548 } 4549 4550 #ifndef CRAY 4551 void sigbus_handler(int sig) 4552 { 4553 /* See sigbus_handler() in the 'ifdef sgi' case for details. Here, 4554 we don't have the siginfo stuff so the guess is weaker but we'll 4555 do it anyway. 4556 */ 4557 4558 if (active_mmap_rw && havesigint) 4559 cleanup_handler(sig); 4560 else 4561 die_handler(sig); 4562 } 4563 #endif /* !CRAY */ 4564 #endif /* sgi */ 4565 4566 void noop_handler(int sig) 4567 { 4568 return; 4569 } 4570 4571 /* 4572 * SIGINT handler for the parent (original doio) process. It simply sends 4573 * a SIGINT to all of the doio children. Since they're all in the same 4574 * pgrp, this can be done with a single kill(). 4575 */ 4576 4577 void sigint_handler(int sig) 4578 { 4579 int i; 4580 4581 for (i = 0; i < Nchildren; i++) { 4582 if (Children[i] != -1) { 4583 kill(Children[i], SIGINT); 4584 } 4585 } 4586 } 4587 4588 /* 4589 * Signal handler used to inform a process when async io completes. Referenced 4590 * in do_read() and do_write(). Note that the signal handler is not 4591 * re-registered. 4592 */ 4593 4594 void aio_handler(int sig) 4595 { 4596 unsigned int i; 4597 struct aio_info *aiop; 4598 4599 for (i = 0; i < sizeof(Aio_Info) / sizeof(Aio_Info[0]); i++) { 4600 aiop = &Aio_Info[i]; 4601 4602 if (aiop->strategy == A_SIGNAL && aiop->sig == sig) { 4603 aiop->signalled++; 4604 4605 if (aio_done(aiop)) { 4606 aiop->done++; 4607 } 4608 } 4609 } 4610 } 4611 4612 /* 4613 * dump info on all open aio slots 4614 */ 4615 void dump_aio(void) 4616 { 4617 unsigned int i, count; 4618 4619 count = 0; 4620 for (i = 0; i < sizeof(Aio_Info) / sizeof(Aio_Info[0]); i++) { 4621 if (Aio_Info[i].busy) { 4622 count++; 4623 fprintf(stderr, 4624 "Aio_Info[%03d] id=%d fd=%d signal=%d signaled=%d\n", 4625 i, Aio_Info[i].id, 4626 Aio_Info[i].fd, 4627 Aio_Info[i].sig, Aio_Info[i].signalled); 4628 fprintf(stderr, "\tstrategy=%s\n", 4629 format_strat(Aio_Info[i].strategy)); 4630 } 4631 } 4632 fprintf(stderr, "%d active async i/os\n", count); 4633 } 4634 4635 #ifdef sgi 4636 /* 4637 * Signal handler called as a callback, not as a signal. 4638 * 'val' is the value from sigev_value and is assumed to be the 4639 * Aio_Info[] index. 4640 */ 4641 void cb_handler(sigval_t val) 4642 { 4643 struct aio_info *aiop; 4644 4645 /*printf("cb_handler requesting slot %d\n", val.sival_int);*/ 4646 aiop = aio_slot(val.sival_int); 4647 /*printf("cb_handler, aiop=%p\n", aiop);*/ 4648 4649 /*printf("%d in cb_handler\n", getpid() );*/ 4650 if (aiop->strategy == A_CALLBACK) { 4651 aiop->signalled++; 4652 4653 if (aio_done(aiop)) { 4654 aiop->done++; 4655 } 4656 } 4657 } 4658 #endif 4659 4660 struct aio_info *aio_slot(int aio_id) 4661 { 4662 unsigned int i; 4663 static int id = 1; 4664 struct aio_info *aiop; 4665 4666 aiop = NULL; 4667 4668 for (i = 0; i < sizeof(Aio_Info) / sizeof(Aio_Info[0]); i++) { 4669 if (aio_id == -1) { 4670 if (!Aio_Info[i].busy) { 4671 aiop = &Aio_Info[i]; 4672 aiop->busy = 1; 4673 aiop->id = id++; 4674 break; 4675 } 4676 } else { 4677 if (Aio_Info[i].busy && Aio_Info[i].id == aio_id) { 4678 aiop = &Aio_Info[i]; 4679 break; 4680 } 4681 } 4682 } 4683 4684 if (aiop == NULL) { 4685 doio_fprintf(stderr, "aio_slot(%d) not found. Request %d\n", 4686 aio_id, Reqno); 4687 dump_aio(); 4688 alloc_mem(-1); 4689 exit(E_INTERNAL); 4690 } 4691 4692 return aiop; 4693 } 4694 4695 int aio_register(int fd, int strategy, int sig) 4696 { 4697 struct aio_info *aiop; 4698 struct sigaction sa; 4699 4700 aiop = aio_slot(-1); 4701 4702 aiop->fd = fd; 4703 aiop->strategy = strategy; 4704 aiop->done = 0; 4705 #ifdef CRAY 4706 memset((char *)&aiop->iosw, 0x00, sizeof(aiop->iosw)); 4707 #endif 4708 4709 if (strategy == A_SIGNAL) { 4710 aiop->sig = sig; 4711 aiop->signalled = 0; 4712 4713 sa.sa_handler = aio_handler; 4714 sa.sa_flags = 0; 4715 sigemptyset(&sa.sa_mask); 4716 4717 sigaction(sig, &sa, &aiop->osa); 4718 } else { 4719 aiop->sig = -1; 4720 aiop->signalled = 0; 4721 } 4722 4723 return aiop->id; 4724 } 4725 4726 int aio_unregister(int aio_id) 4727 { 4728 struct aio_info *aiop; 4729 4730 aiop = aio_slot(aio_id); 4731 4732 if (aiop->strategy == A_SIGNAL) { 4733 sigaction(aiop->sig, &aiop->osa, NULL); 4734 } 4735 4736 aiop->busy = 0; 4737 return 0; 4738 } 4739 4740 #ifndef __linux__ 4741 int aio_wait(int aio_id) 4742 { 4743 #ifdef RECALL_SIZEOF 4744 long mask[RECALL_SIZEOF]; 4745 #endif 4746 sigset_t signalset; 4747 struct aio_info *aiop; 4748 #ifdef CRAY 4749 struct iosw *ioswlist[1]; 4750 #endif 4751 #ifdef sgi 4752 const aiocb_t *aioary[1]; 4753 #endif 4754 int r, cnt; 4755 4756 aiop = aio_slot(aio_id); 4757 /*printf("%d aiop B =%p\n", getpid(), aiop);*/ 4758 4759 switch (aiop->strategy) { 4760 case A_POLL: 4761 while (!aio_done(aiop)) ; 4762 break; 4763 4764 case A_SIGNAL: 4765 sigemptyset(&signalset); 4766 sighold(aiop->sig); 4767 4768 while (!aiop->signalled || !aiop->done) { 4769 sigsuspend(&signalset); 4770 sighold(aiop->sig); 4771 } 4772 break; 4773 4774 #ifdef CRAY 4775 case A_RECALL: 4776 ioswlist[0] = &aiop->iosw; 4777 if (recall(aiop->fd, 1, ioswlist) < 0) { 4778 doio_fprintf(stderr, "recall() failed: %s (%d)\n", 4779 SYSERR, errno); 4780 exit(E_SETUP); 4781 } 4782 break; 4783 4784 #ifdef RECALL_SIZEOF 4785 4786 case A_RECALLA: 4787 RECALL_INIT(mask); 4788 RECALL_SET(mask, aiop->fd); 4789 if (recalla(mask) < 0) { 4790 doio_fprintf(stderr, "recalla() failed: %s (%d)\n", 4791 SYSERR, errno); 4792 exit(E_SETUP); 4793 } 4794 4795 RECALL_CLR(mask, aiop->fd); 4796 break; 4797 #endif 4798 4799 case A_RECALLS: 4800 ioswlist[0] = &aiop->iosw; 4801 if (recalls(1, ioswlist) < 0) { 4802 doio_fprintf(stderr, "recalls failed: %s (%d)\n", 4803 SYSERR, errno); 4804 exit(E_SETUP); 4805 } 4806 break; 4807 #endif /* CRAY */ 4808 4809 #ifdef sgi 4810 case A_CALLBACK: 4811 aioary[0] = &aiop->aiocb; 4812 cnt = 0; 4813 do { 4814 r = aio_suspend(aioary, 1, NULL); 4815 if (r == -1) { 4816 doio_fprintf(stderr, 4817 "aio_suspend failed: %s (%d)\n", 4818 SYSERR, errno); 4819 exit(E_SETUP); 4820 } 4821 cnt++; 4822 } while (aiop->done == 0); 4823 4824 #if 0 4825 /* 4826 * after having this set for a while, I've decided that 4827 * it's too noisy 4828 */ 4829 if (cnt > 1) 4830 doio_fprintf(stderr, 4831 "aio_wait: callback wait took %d tries\n", 4832 cnt); 4833 #endif 4834 4835 /* 4836 * Note: cb_handler already calls aio_done 4837 */ 4838 break; 4839 4840 case A_SUSPEND: 4841 aioary[0] = &aiop->aiocb; 4842 r = aio_suspend(aioary, 1, NULL); 4843 if (r == -1) { 4844 doio_fprintf(stderr, "aio_suspend failed: %s (%d)\n", 4845 SYSERR, errno); 4846 exit(E_SETUP); 4847 } 4848 4849 aio_done(aiop); 4850 break; 4851 #endif 4852 } 4853 4854 /*printf("aio_wait: errno %d return %d\n", aiop->aio_errno, aiop->aio_ret);*/ 4855 4856 return 0; 4857 } 4858 #endif /* !linux */ 4859 4860 /* 4861 * Format specified time into HH:MM:SS format. t is the time to format 4862 * in seconds (as returned from time(2)). 4863 */ 4864 4865 char *hms(time_t t) 4866 { 4867 static char ascii_time[9]; 4868 struct tm *ltime; 4869 4870 ltime = localtime(&t); 4871 strftime(ascii_time, sizeof(ascii_time), "%H:%M:%S", ltime); 4872 4873 return ascii_time; 4874 } 4875 4876 /* 4877 * Simple routine to check if an async io request has completed. 4878 */ 4879 4880 int aio_done(struct aio_info *ainfo) 4881 { 4882 #ifdef CRAY 4883 return ainfo->iosw.sw_flag; 4884 #endif 4885 4886 #ifdef sgi 4887 if ((ainfo->aio_errno = aio_error(&ainfo->aiocb)) == -1) { 4888 doio_fprintf(stderr, "aio_done: aio_error failed: %s (%d)\n", 4889 SYSERR, errno); 4890 exit(E_SETUP); 4891 } 4892 /*printf("%d aio_done aio_errno=%d\n", getpid(), ainfo->aio_errno); */ 4893 if (ainfo->aio_errno != EINPROGRESS) { 4894 if ((ainfo->aio_ret = aio_return(&ainfo->aiocb)) == -1) { 4895 doio_fprintf(stderr, 4896 "aio_done: aio_return failed: %s (%d)\n", 4897 SYSERR, errno); 4898 exit(E_SETUP); 4899 } 4900 } 4901 4902 return (ainfo->aio_errno != EINPROGRESS); 4903 #else 4904 return -1; /* invalid */ 4905 #endif 4906 } 4907 4908 /* 4909 * Routine to handle upanic() - it first attempts to set the panic flag. If 4910 * the flag cannot be set, an error message is issued. A call to upanic 4911 * with PA_PANIC is then done unconditionally, in case the panic flag was set 4912 * from outside the program (as with the panic(8) program). 4913 * 4914 * Note - we only execute the upanic code if -U was used, and the passed in 4915 * mask is set in the Upanic_Conditions bitmask. 4916 */ 4917 4918 void doio_upanic(int mask) 4919 { 4920 if (U_opt == 0 || (mask & Upanic_Conditions) == 0) { 4921 return; 4922 } 4923 #ifdef CRAY 4924 if (upanic(PA_SET) < 0) { 4925 doio_fprintf(stderr, 4926 "WARNING - Could not set the panic flag - upanic(PA_SET) failed: %s (%d)\n", 4927 SYSERR, errno); 4928 } 4929 4930 upanic(PA_PANIC); 4931 #endif 4932 #ifdef sgi 4933 syssgi(1005); /* syssgi test panic - DEBUG kernels only */ 4934 #endif 4935 doio_fprintf(stderr, "WARNING - upanic() failed\n"); 4936 } 4937 4938 /* 4939 * Parse cmdline options/arguments and set appropriate global variables. 4940 * If the cmdline is valid, return 0 to caller. Otherwise exit with a status 4941 * of 1. 4942 */ 4943 4944 int parse_cmdline(int argc, char **argv, char *opts) 4945 { 4946 int c; 4947 char cc, *cp = NULL, *tok = NULL; 4948 extern int opterr; 4949 extern int optind; 4950 extern char *optarg; 4951 struct smap *s; 4952 char *memargs[NMEMALLOC]; 4953 int nmemargs, ma; 4954 4955 if (*argv[0] == '-') { 4956 argv[0]++; 4957 Execd = 1; 4958 } 4959 4960 if ((Prog = strrchr(argv[0], '/')) == NULL) { 4961 Prog = argv[0]; 4962 } else { 4963 Prog++; 4964 } 4965 4966 opterr = 0; 4967 while ((c = getopt(argc, argv, opts)) != EOF) { 4968 switch ((char)c) { 4969 case 'a': 4970 a_opt++; 4971 break; 4972 4973 case 'C': 4974 C_opt++; 4975 for (s = checkmap; s->string != NULL; s++) 4976 if (!strcmp(s->string, optarg)) 4977 break; 4978 if (s->string == NULL && tok != NULL) { 4979 fprintf(stderr, 4980 "%s%s: Illegal -C arg (%s). Must be one of: ", 4981 Prog, TagName, tok); 4982 4983 for (s = checkmap; s->string != NULL; s++) 4984 fprintf(stderr, "%s ", s->string); 4985 fprintf(stderr, "\n"); 4986 exit(1); 4987 } 4988 4989 switch (s->value) { 4990 case C_DEFAULT: 4991 Data_Fill = doio_pat_fill; 4992 Data_Check = doio_pat_check; 4993 break; 4994 default: 4995 fprintf(stderr, 4996 "%s%s: Unrecognised -C arg '%s' %d", 4997 Prog, TagName, s->string, s->value); 4998 exit(1); 4999 } 5000 break; 5001 5002 case 'd': /* delay between i/o ops */ 5003 parse_delay(optarg); 5004 break; 5005 5006 case 'e': 5007 if (Npes > 1 && Nprocs > 1) { 5008 fprintf(stderr, 5009 "%s%s: Warning - Program is a multi-pe application - exec option is ignored.\n", 5010 Prog, TagName); 5011 } 5012 e_opt++; 5013 break; 5014 5015 case 'h': 5016 help(stdout); 5017 exit(0); 5018 break; 5019 5020 case 'k': 5021 k_opt++; 5022 break; 5023 5024 case 'm': 5025 Message_Interval = strtol(optarg, &cp, 10); 5026 if (*cp != '\0' || Message_Interval < 0) { 5027 fprintf(stderr, 5028 "%s%s: Illegal -m arg (%s): Must be an integer >= 0\n", 5029 Prog, TagName, optarg); 5030 exit(1); 5031 } 5032 m_opt++; 5033 break; 5034 5035 case 'M': /* memory allocation types */ 5036 #ifndef CRAY 5037 nmemargs = string_to_tokens(optarg, memargs, 32, ","); 5038 for (ma = 0; ma < nmemargs; ma++) { 5039 parse_memalloc(memargs[ma]); 5040 } 5041 /*dump_memalloc(); */ 5042 #else 5043 fprintf(stderr, 5044 "%s%s: Error: -M isn't supported on this platform\n", 5045 Prog, TagName); 5046 exit(1); 5047 #endif 5048 M_opt++; 5049 break; 5050 5051 case 'N': 5052 sprintf(TagName, "(%.39s)", optarg); 5053 break; 5054 5055 case 'n': 5056 Nprocs = strtol(optarg, &cp, 10); 5057 if (*cp != '\0' || Nprocs < 1) { 5058 fprintf(stderr, 5059 "%s%s: Illegal -n arg (%s): Must be integer > 0\n", 5060 Prog, TagName, optarg); 5061 exit(E_USAGE); 5062 } 5063 5064 if (Npes > 1 && Nprocs > 1) { 5065 fprintf(stderr, 5066 "%s%s: Program has been built as a multi-pe app. -n1 is the only nprocs value allowed\n", 5067 Prog, TagName); 5068 exit(E_SETUP); 5069 } 5070 n_opt++; 5071 break; 5072 5073 case 'r': 5074 Release_Interval = strtol(optarg, &cp, 10); 5075 if (*cp != '\0' || Release_Interval < 0) { 5076 fprintf(stderr, 5077 "%s%s: Illegal -r arg (%s): Must be integer >= 0\n", 5078 Prog, TagName, optarg); 5079 exit(E_USAGE); 5080 } 5081 5082 r_opt++; 5083 break; 5084 5085 case 'w': 5086 Write_Log = optarg; 5087 w_opt++; 5088 break; 5089 5090 case 'v': 5091 v_opt++; 5092 break; 5093 5094 case 'V': 5095 if (strcasecmp(optarg, "sync") == 0) { 5096 Validation_Flags = O_SYNC; 5097 } else if (strcasecmp(optarg, "buffered") == 0) { 5098 Validation_Flags = 0; 5099 #ifdef CRAY 5100 } else if (strcasecmp(optarg, "parallel") == 0) { 5101 Validation_Flags = O_PARALLEL; 5102 } else if (strcasecmp(optarg, "ldraw") == 0) { 5103 Validation_Flags = O_LDRAW; 5104 } else if (strcasecmp(optarg, "raw") == 0) { 5105 Validation_Flags = O_RAW; 5106 #endif 5107 #ifdef sgi 5108 } else if (strcasecmp(optarg, "direct") == 0) { 5109 Validation_Flags = O_DIRECT; 5110 #endif 5111 } else { 5112 if (sscanf 5113 (optarg, "%i%c", &Validation_Flags, 5114 &cc) != 1) { 5115 fprintf(stderr, 5116 "%s: Invalid -V argument (%s) - must be a decimal, hex, or octal\n", 5117 Prog, optarg); 5118 fprintf(stderr, 5119 " number, or one of the following strings: 'sync',\n"); 5120 fprintf(stderr, 5121 " 'buffered', 'parallel', 'ldraw', or 'raw'\n"); 5122 exit(E_USAGE); 5123 } 5124 } 5125 V_opt++; 5126 break; 5127 case 'U': 5128 tok = strtok(optarg, ","); 5129 while (tok != NULL) { 5130 for (s = Upanic_Args; s->string != NULL; s++) 5131 if (strcmp(s->string, tok) == 0) 5132 break; 5133 5134 if (s->string == NULL) { 5135 fprintf(stderr, 5136 "%s%s: Illegal -U arg (%s). Must be one of: ", 5137 Prog, TagName, tok); 5138 5139 for (s = Upanic_Args; s->string != NULL; 5140 s++) 5141 fprintf(stderr, "%s ", 5142 s->string); 5143 5144 fprintf(stderr, "\n"); 5145 5146 exit(1); 5147 } 5148 5149 Upanic_Conditions |= s->value; 5150 tok = strtok(NULL, ","); 5151 } 5152 5153 U_opt++; 5154 break; 5155 5156 case '?': 5157 usage(stderr); 5158 exit(E_USAGE); 5159 break; 5160 } 5161 } 5162 5163 /* 5164 * Supply defaults 5165 */ 5166 5167 if (!C_opt) { 5168 Data_Fill = doio_pat_fill; 5169 Data_Check = doio_pat_check; 5170 } 5171 5172 if (!U_opt) 5173 Upanic_Conditions = 0; 5174 5175 if (!n_opt) 5176 Nprocs = 1; 5177 5178 if (!r_opt) 5179 Release_Interval = DEF_RELEASE_INTERVAL; 5180 5181 if (!M_opt) { 5182 Memalloc[Nmemalloc].memtype = MEM_DATA; 5183 Memalloc[Nmemalloc].flags = 0; 5184 Memalloc[Nmemalloc].name = NULL; 5185 Memalloc[Nmemalloc].space = NULL; 5186 Nmemalloc++; 5187 } 5188 5189 /* 5190 * Initialize input stream 5191 */ 5192 5193 if (argc == optind) { 5194 Infile = NULL; 5195 } else { 5196 Infile = argv[optind++]; 5197 } 5198 5199 if (argc != optind) { 5200 usage(stderr); 5201 exit(E_USAGE); 5202 } 5203 5204 return 0; 5205 } 5206 5207 /* 5208 * Parse memory allocation types 5209 * 5210 * Types are: 5211 * Data 5212 * T3E-shmem:blksize[:nblks] 5213 * SysV-shmem:shmid:blksize:nblks 5214 * if shmid is "private", use IPC_PRIVATE 5215 * and nblks is not required 5216 * 5217 * mmap:flags:filename:blksize[:nblks] 5218 * flags are one of: 5219 * p - private (MAP_PRIVATE) 5220 * a - private, MAP_AUTORESRV 5221 * l - local (MAP_LOCAL) 5222 * s - shared (nblks required) 5223 * 5224 * plus any of: 5225 * f - fixed address (MAP_FIXED) 5226 * A - use an address without MAP_FIXED 5227 * a - autogrow (map once at startup) 5228 * 5229 * mmap:flags:devzero 5230 * mmap /dev/zero (shared not allowd) 5231 * maps the first 4096 bytes of /dev/zero 5232 * 5233 * - put a directory at the beginning of the shared 5234 * regions saying what pid has what region. 5235 * DIRMAGIC 5236 * BLKSIZE 5237 * NBLKS 5238 * nblks worth of directories - 1 int pids 5239 */ 5240 #ifndef CRAY 5241 void parse_memalloc(char *arg) 5242 { 5243 char *allocargs[NMEMALLOC]; 5244 int nalloc; 5245 struct memalloc *M; 5246 5247 if (Nmemalloc >= NMEMALLOC) { 5248 doio_fprintf(stderr, "Error - too many memory types (%d).\n", 5249 Nmemalloc); 5250 return; 5251 } 5252 5253 M = &Memalloc[Nmemalloc]; 5254 5255 nalloc = string_to_tokens(arg, allocargs, 32, ":"); 5256 if (!strcmp(allocargs[0], "data")) { 5257 M->memtype = MEM_DATA; 5258 M->flags = 0; 5259 M->name = NULL; 5260 M->space = NULL; 5261 Nmemalloc++; 5262 if (nalloc >= 2) { 5263 if (strchr(allocargs[1], 'p')) 5264 M->flags |= MEMF_MPIN; 5265 } 5266 } else if (!strcmp(allocargs[0], "mmap")) { 5267 /* mmap:flags:filename[:size] */ 5268 M->memtype = MEM_MMAP; 5269 M->flags = 0; 5270 M->space = NULL; 5271 if (nalloc >= 1) { 5272 if (strchr(allocargs[1], 'p')) 5273 M->flags |= MEMF_PRIVATE; 5274 if (strchr(allocargs[1], 'a')) 5275 M->flags |= MEMF_AUTORESRV; 5276 if (strchr(allocargs[1], 'l')) 5277 M->flags |= MEMF_LOCAL; 5278 if (strchr(allocargs[1], 's')) 5279 M->flags |= MEMF_SHARED; 5280 5281 if (strchr(allocargs[1], 'f')) 5282 M->flags |= MEMF_FIXADDR; 5283 if (strchr(allocargs[1], 'A')) 5284 M->flags |= MEMF_ADDR; 5285 if (strchr(allocargs[1], 'G')) 5286 M->flags |= MEMF_AUTOGROW; 5287 5288 if (strchr(allocargs[1], 'U')) 5289 M->flags |= MEMF_FILE; 5290 } else { 5291 M->flags |= MEMF_PRIVATE; 5292 } 5293 5294 if (nalloc > 2) { 5295 if (!strcmp(allocargs[2], "devzero")) { 5296 M->name = "/dev/zero"; 5297 if (M->flags & 5298 ((MEMF_PRIVATE | MEMF_LOCAL) == 0)) 5299 M->flags |= MEMF_PRIVATE; 5300 } else { 5301 M->name = allocargs[2]; 5302 } 5303 } else { 5304 M->name = "/dev/zero"; 5305 if (M->flags & ((MEMF_PRIVATE | MEMF_LOCAL) == 0)) 5306 M->flags |= MEMF_PRIVATE; 5307 } 5308 Nmemalloc++; 5309 5310 } else if (!strcmp(allocargs[0], "shmem")) { 5311 /* shmem:shmid:size */ 5312 M->memtype = MEM_SHMEM; 5313 M->flags = 0; 5314 M->space = NULL; 5315 if (nalloc >= 2) { 5316 M->name = allocargs[1]; 5317 } else { 5318 M->name = NULL; 5319 } 5320 if (nalloc >= 3) { 5321 sscanf(allocargs[2], "%i", &M->nblks); 5322 } else { 5323 M->nblks = 0; 5324 } 5325 if (nalloc >= 4) { 5326 if (strchr(allocargs[3], 'p')) 5327 M->flags |= MEMF_MPIN; 5328 } 5329 5330 Nmemalloc++; 5331 } else { 5332 doio_fprintf(stderr, "Error - unknown memory type '%s'.\n", 5333 allocargs[0]); 5334 exit(1); 5335 } 5336 } 5337 5338 void dump_memalloc(void) 5339 { 5340 int ma; 5341 char *mt; 5342 5343 if (Nmemalloc == 0) { 5344 printf("No memory allocation strategies devined\n"); 5345 return; 5346 } 5347 5348 for (ma = 0; ma < Nmemalloc; ma++) { 5349 switch (Memalloc[ma].memtype) { 5350 case MEM_DATA: 5351 mt = "data"; 5352 break; 5353 case MEM_SHMEM: 5354 mt = "shmem"; 5355 break; 5356 case MEM_MMAP: 5357 mt = "mmap"; 5358 break; 5359 default: 5360 mt = "unknown"; 5361 break; 5362 } 5363 printf("mstrat[%d] = %d %s\n", ma, Memalloc[ma].memtype, mt); 5364 printf("\tflags=%#o name='%s' nblks=%d\n", 5365 Memalloc[ma].flags, 5366 Memalloc[ma].name, Memalloc[ma].nblks); 5367 } 5368 } 5369 5370 #endif /* !CRAY */ 5371 5372 /* 5373 * -d <op>:<time> - doio inter-operation delay 5374 * currently this permits ONE type of delay between operations. 5375 */ 5376 5377 void parse_delay(char *arg) 5378 { 5379 char *delayargs[NMEMALLOC]; 5380 int ndelay; 5381 struct smap *s; 5382 5383 ndelay = string_to_tokens(arg, delayargs, 32, ":"); 5384 if (ndelay < 2) { 5385 doio_fprintf(stderr, 5386 "Illegal delay arg (%s). Must be operation:time\n", 5387 arg); 5388 exit(1); 5389 } 5390 for (s = delaymap; s->string != NULL; s++) 5391 if (!strcmp(s->string, delayargs[0])) 5392 break; 5393 if (s->string == NULL) { 5394 fprintf(stderr, 5395 "Illegal Delay arg (%s). Must be one of: ", arg); 5396 5397 for (s = delaymap; s->string != NULL; s++) 5398 fprintf(stderr, "%s ", s->string); 5399 fprintf(stderr, "\n"); 5400 exit(1); 5401 } 5402 5403 delayop = s->value; 5404 5405 sscanf(delayargs[1], "%i", &delaytime); 5406 5407 if (ndelay > 2) { 5408 fprintf(stderr, "Warning: extra delay arguments ignored.\n"); 5409 } 5410 } 5411 5412 /* 5413 * Usage clause - obvious 5414 */ 5415 5416 int usage(FILE * stream) 5417 { 5418 /* 5419 * Only do this if we are on vpe 0, to avoid seeing it from every 5420 * process in the application. 5421 */ 5422 5423 if (Npes > 1 && Vpe != 0) { 5424 return 0; 5425 } 5426 5427 fprintf(stream, 5428 "usage%s: %s [-aekv] [-m message_interval] [-n nprocs] [-r release_interval] [-w write_log] [-V validation_ftype] [-U upanic_cond] [infile]\n", 5429 TagName, Prog); 5430 return 0; 5431 } 5432 5433 void help(FILE * stream) 5434 { 5435 /* 5436 * Only the app running on vpe 0 gets to issue help - this prevents 5437 * everybody in the application from doing this. 5438 */ 5439 5440 if (Npes > 1 && Vpe != 0) { 5441 return; 5442 } 5443 5444 usage(stream); 5445 fprintf(stream, "\n"); 5446 fprintf(stream, 5447 "\t-a abort - kill all doio processes on data compare\n"); 5448 fprintf(stream, 5449 "\t errors. Normally only the erroring process exits\n"); 5450 fprintf(stream, "\t-C data-pattern-type \n"); 5451 fprintf(stream, 5452 "\t Available data patterns are:\n"); 5453 fprintf(stream, "\t default - repeating pattern\n"); 5454 fprintf(stream, "\t-d Operation:Time Inter-operation delay.\n"); 5455 fprintf(stream, "\t Operations are:\n"); 5456 fprintf(stream, 5457 "\t select:time (1 second=1000000)\n"); 5458 fprintf(stream, "\t sleep:time (1 second=1)\n"); 5459 #ifdef sgi 5460 fprintf(stream, 5461 "\t sginap:time (1 second=CLK_TCK=100)\n"); 5462 #endif 5463 fprintf(stream, "\t alarm:time (1 second=1)\n"); 5464 fprintf(stream, 5465 "\t-e Re-exec children before entering the main\n"); 5466 fprintf(stream, 5467 "\t loop. This is useful for spreading\n"); 5468 fprintf(stream, 5469 "\t procs around on multi-pe systems.\n"); 5470 fprintf(stream, 5471 "\t-k Lock file regions during writes using fcntl()\n"); 5472 fprintf(stream, 5473 "\t-v Verify writes - this is done by doing a buffered\n"); 5474 fprintf(stream, 5475 "\t read() of the data if file io was done, or\n"); 5476 fprintf(stream, 5477 "\t an ssread()of the data if sds io was done\n"); 5478 #ifndef CRAY 5479 fprintf(stream, 5480 "\t-M Data buffer allocation method\n"); 5481 fprintf(stream, "\t alloc-type[,type]\n"); 5482 #ifdef sgi 5483 fprintf(stream, "\t data:flags\n"); 5484 fprintf(stream, "\t p - mpin buffer\n"); 5485 fprintf(stream, "\t shmem:shmid:size:flags\n"); 5486 fprintf(stream, "\t p - mpin buffer\n"); 5487 #else 5488 fprintf(stream, "\t data\n"); 5489 fprintf(stream, "\t shmem:shmid:size\n"); 5490 #endif /* sgi */ 5491 fprintf(stream, "\t mmap:flags:filename\n"); 5492 fprintf(stream, "\t p - private\n"); 5493 #ifdef sgi 5494 fprintf(stream, "\t s - shared\n"); 5495 fprintf(stream, "\t l - local\n"); 5496 fprintf(stream, "\t a - autoresrv\n"); 5497 fprintf(stream, "\t G - autogrow\n"); 5498 #else 5499 fprintf(stream, 5500 "\t s - shared (shared file must exist\n"), 5501 fprintf(stream, 5502 "\t and have needed length)\n"); 5503 #endif 5504 fprintf(stream, 5505 "\t f - fixed address (not used)\n"); 5506 fprintf(stream, 5507 "\t a - specify address (not used)\n"); 5508 fprintf(stream, 5509 "\t U - Unlink file when done\n"); 5510 fprintf(stream, 5511 "\t The default flag is private\n"); 5512 fprintf(stream, "\n"); 5513 #endif /* !CRAY */ 5514 fprintf(stream, 5515 "\t-m message_interval Generate a message every 'message_interval'\n"); 5516 fprintf(stream, 5517 "\t requests. An interval of 0 suppresses\n"); 5518 fprintf(stream, 5519 "\t messages. The default is 0.\n"); 5520 fprintf(stream, "\t-N tagname Tag name, for Monster.\n"); 5521 fprintf(stream, "\t-n nprocs # of processes to start up\n"); 5522 fprintf(stream, 5523 "\t-r release_interval Release all memory and close\n"); 5524 fprintf(stream, 5525 "\t files every release_interval operations.\n"); 5526 fprintf(stream, 5527 "\t By default procs never release memory\n"); 5528 fprintf(stream, 5529 "\t or close fds unless they have to.\n"); 5530 fprintf(stream, 5531 "\t-V validation_ftype The type of file descriptor to use for doing data\n"); 5532 fprintf(stream, 5533 "\t validation. validation_ftype may be an octal,\n"); 5534 fprintf(stream, 5535 "\t hex, or decimal number representing the open()\n"); 5536 fprintf(stream, 5537 "\t flags, or may be one of the following strings:\n"); 5538 fprintf(stream, 5539 "\t 'buffered' - validate using bufferd read\n"); 5540 fprintf(stream, 5541 "\t 'sync' - validate using O_SYNC read\n"); 5542 #ifdef sgi 5543 fprintf(stream, 5544 "\t 'direct - validate using O_DIRECT read'\n"); 5545 #endif 5546 #ifdef CRAY 5547 fprintf(stream, 5548 "\t 'ldraw' - validate using O_LDRAW read\n"); 5549 fprintf(stream, 5550 "\t 'parallel' - validate using O_PARALLEL read\n"); 5551 fprintf(stream, 5552 "\t 'raw' - validate using O_RAW read\n"); 5553 #endif 5554 fprintf(stream, "\t By default, 'parallel'\n"); 5555 fprintf(stream, 5556 "\t is used if the write was done with O_PARALLEL\n"); 5557 fprintf(stream, 5558 "\t or 'buffered' for all other writes.\n"); 5559 fprintf(stream, 5560 "\t-w write_log File to log file writes to. The doio_check\n"); 5561 fprintf(stream, 5562 "\t program can reconstruct datafiles using the\n"); 5563 fprintf(stream, 5564 "\t write_log, and detect if a file is corrupt\n"); 5565 fprintf(stream, 5566 "\t after all procs have exited.\n"); 5567 fprintf(stream, 5568 "\t-U upanic_cond Comma separated list of conditions that will\n"); 5569 fprintf(stream, 5570 "\t cause a call to upanic(PA_PANIC).\n"); 5571 fprintf(stream, 5572 "\t 'corruption' -> upanic on bad data comparisons\n"); 5573 fprintf(stream, 5574 "\t 'iosw' ---> upanic on unexpected async iosw\n"); 5575 fprintf(stream, 5576 "\t 'rval' ---> upanic on unexpected syscall rvals\n"); 5577 fprintf(stream, 5578 "\t 'all' ---> all of the above\n"); 5579 fprintf(stream, "\n"); 5580 fprintf(stream, 5581 "\tinfile Input stream - default is stdin - must be a list\n"); 5582 fprintf(stream, 5583 "\t of io_req structures (see doio.h). Currently\n"); 5584 fprintf(stream, 5585 "\t only the iogen program generates the proper\n"); 5586 fprintf(stream, "\t format\n"); 5587 } 5588