Home | History | Annotate | Download | only in doio
      1 /*
      2  * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
      3  *
      4  * This program is free software; you can redistribute it and/or modify it
      5  * under the terms of version 2 of the GNU General Public License as
      6  * published by the Free Software Foundation.
      7  *
      8  * This program is distributed in the hope that it would be useful, but
      9  * WITHOUT ANY WARRANTY; without even the implied warranty of
     10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
     11  *
     12  * Further, this software is distributed without any warranty that it is
     13  * free of the rightful claim of any third person regarding infringement
     14  * or the like.  Any license provided herein, whether implied or
     15  * otherwise, applies only to this software file.  Patent licenses, if
     16  * any, provided herein do not apply to combinations of this program with
     17  * other software, or any other product whatsoever.
     18  *
     19  * You should have received a copy of the GNU General Public License along
     20  * with this program; if not, write the Free Software Foundation, Inc.,
     21  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
     22  *
     23  * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
     24  * Mountain View, CA  94043, or:
     25  *
     26  * http://www.sgi.com
     27  *
     28  * For further information regarding this notice, see:
     29  *
     30  * http://oss.sgi.com/projects/GenInfo/NoticeExplan/
     31  */
     32 /*
     33  * doio -	a general purpose io initiator with system call and
     34  *		write logging.  See doio.h for the structure which defines
     35  *		what doio requests should look like.
     36  *
     37  *		Currently doio can handle read,write,reada,writea,ssread,
     38  *		sswrite, and many varieties of listio requests.
     39  *		For disk io, if the O_SSD flag is set doio will allocate
     40  *		the appropriate amount of ssd and do the transfer - thus, doio
     41  *		can handle all of the primitive types of file io.
     42  *
     43  * programming
     44  * notes:
     45  * -----------
     46  *	messages should generally be printed using doio_fprintf().
     47  *
     48  */
     49 
     50 #include <stdio.h>
     51 #include <errno.h>
     52 #include <fcntl.h>
     53 #include <stdlib.h>
     54 #include <signal.h>
     55 #include <string.h>
     56 #include <ctype.h>
     57 #include <unistd.h>
     58 #include <time.h>
     59 #include <stdarg.h>
     60 #include <sys/stat.h>
     61 #include <sys/param.h>
     62 #include <sys/types.h>
     63 #include <sys/sysmacros.h>
     64 #ifdef CRAY
     65 #include <sys/iosw.h>
     66 #endif
     67 #ifdef sgi
     68 #include <aio.h>		/* for aio_read,write */
     69 #include <inttypes.h>		/* for uint64_t type */
     70 #include <siginfo.h>		/* signal handlers & SA_SIGINFO */
     71 #endif
     72 #ifndef CRAY
     73 #include <sys/uio.h>		/* for struct iovec (readv) */
     74 #include <sys/mman.h>		/* for mmap(2) */
     75 #include <sys/ipc.h>		/* for i/o buffer in shared memory */
     76 #include <sys/shm.h>		/* for i/o buffer in shared memory */
     77 #endif
     78 #include <sys/wait.h>
     79 #ifdef CRAY
     80 #include <sys/listio.h>
     81 #include <sys/panic.h>
     82 #endif
     83 #include <sys/time.h>		/* for delays */
     84 
     85 #include "doio.h"
     86 #include "write_log.h"
     87 #include "random_range.h"
     88 #include "string_to_tokens.h"
     89 #include "pattern.h"
     90 
     91 #define	NMEMALLOC	32
     92 #define	MEM_DATA	1	/* data space                           */
     93 #define	MEM_SHMEM	2	/* System V shared memory               */
     94 #define	MEM_T3ESHMEM	3	/* T3E Shared Memory                    */
     95 #define	MEM_MMAP	4	/* mmap(2)                              */
     96 
     97 #define	MEMF_PRIVATE	0001
     98 #define	MEMF_AUTORESRV	0002
     99 #define	MEMF_LOCAL	0004
    100 #define	MEMF_SHARED	0010
    101 
    102 #define	MEMF_FIXADDR	0100
    103 #define	MEMF_ADDR	0200
    104 #define	MEMF_AUTOGROW	0400
    105 #define	MEMF_FILE	01000	/* regular file -- unlink on close      */
    106 #define	MEMF_MPIN	010000	/* use mpin(2) to lock pages in memory */
    107 
    108 struct memalloc {
    109 	int memtype;
    110 	int flags;
    111 	int nblks;
    112 	char *name;
    113 	void *space;		/* memory address of allocated space */
    114 	int fd;			/* FD open for mmaping */
    115 	int size;
    116 } Memalloc[NMEMALLOC];
    117 
    118 /*
    119  * Structure for maintaining open file test descriptors.  Used by
    120  * alloc_fd().
    121  */
    122 
    123 struct fd_cache {
    124 	char c_file[MAX_FNAME_LENGTH + 1];
    125 	int c_oflags;
    126 	int c_fd;
    127 	long c_rtc;
    128 #ifdef sgi
    129 	int c_memalign;		/* from F_DIOINFO */
    130 	int c_miniosz;
    131 	int c_maxiosz;
    132 #endif
    133 #ifndef CRAY
    134 	void *c_memaddr;	/* mmapped address */
    135 	int c_memlen;		/* length of above region */
    136 #endif
    137 };
    138 
    139 /*
    140  * Name-To-Value map
    141  * Used to map cmdline arguments to values
    142  */
    143 struct smap {
    144 	char *string;
    145 	int value;
    146 };
    147 
    148 struct aio_info {
    149 	int busy;
    150 	int id;
    151 	int fd;
    152 	int strategy;
    153 	volatile int done;
    154 #ifdef CRAY
    155 	struct iosw iosw;
    156 #endif
    157 #ifdef sgi
    158 	aiocb_t aiocb;
    159 	int aio_ret;		/* from aio_return */
    160 	int aio_errno;		/* from aio_error */
    161 #endif
    162 	int sig;
    163 	int signalled;
    164 	struct sigaction osa;
    165 };
    166 
    167 /* ---------------------------------------------------------------------------
    168  *
    169  * A new paradigm of doing the r/w system call where there is a "stub"
    170  * function that builds the info for the system call, then does the system
    171  * call; this is called by code that is common to all system calls and does
    172  * the syscall return checking, async I/O wait, iosw check, etc.
    173  *
    174  * Flags:
    175  *	WRITE, ASYNC, SSD/SDS,
    176  *	FILE_LOCK, WRITE_LOG, VERIFY_DATA,
    177  */
    178 
    179 struct status {
    180 	int rval;		/* syscall return */
    181 	int err;		/* errno */
    182 	int *aioid;		/* list of async I/O structures */
    183 };
    184 
    185 struct syscall_info {
    186 	char *sy_name;
    187 	int sy_type;
    188 	struct status *(*sy_syscall) ();
    189 	int (*sy_buffer) ();
    190 	char *(*sy_format) ();
    191 	int sy_flags;
    192 	int sy_bits;
    193 };
    194 
    195 #define	SY_WRITE		00001
    196 #define	SY_ASYNC		00010
    197 #define	SY_IOSW			00020
    198 #define	SY_SDS			00100
    199 
    200 #ifndef O_SSD
    201 #define O_SSD 0			/* so code compiles on a CRAY2 */
    202 #endif
    203 
    204 #ifdef sgi
    205 #define UINT64_T uint64_t
    206 #else
    207 #define UINT64_T unsigned long
    208 #endif
    209 
    210 #ifndef O_PARALLEL
    211 #define O_PARALLEL 0		/* so O_PARALLEL may be used in expressions */
    212 #endif
    213 
    214 #define PPID_CHECK_INTERVAL 5	/* check ppid every <-- iterations */
    215 #define	MAX_AIO		256	/* maximum number of async I/O ops */
    216 #ifdef _CRAYMPP
    217 #define	MPP_BUMP	16	/* page un-alignment for MPP */
    218 #else
    219 #define	MPP_BUMP	0
    220 #endif
    221 
    222 #define	SYSERR strerror(errno)
    223 
    224 /*
    225  * getopt() string of supported cmdline arguments.
    226  */
    227 
    228 #define OPTS	"aC:d:ehm:n:kr:w:vU:V:M:N:"
    229 
    230 #define DEF_RELEASE_INTERVAL	0
    231 
    232 /*
    233  * Flags set in parse_cmdline() to indicate which options were selected
    234  * on the cmdline.
    235  */
    236 
    237 int a_opt = 0;			/* abort on data compare errors     */
    238 int e_opt = 0;			/* exec() after fork()'ing          */
    239 int C_opt = 0;			/* Data Check Type                  */
    240 int d_opt = 0;			/* delay between operations         */
    241 int k_opt = 0;			/* lock file regions during writes  */
    242 int m_opt = 0;			/* generate periodic messages       */
    243 int n_opt = 0;			/* nprocs                           */
    244 int r_opt = 0;			/* resource release interval        */
    245 int w_opt = 0;			/* file write log file              */
    246 int v_opt = 0;			/* verify writes if set             */
    247 int U_opt = 0;			/* upanic() on varios conditions    */
    248 int V_opt = 0;			/* over-ride default validation fd type */
    249 int M_opt = 0;			/* data buffer allocation types     */
    250 char TagName[40];		/* name of this doio (see Monster)  */
    251 
    252 /*
    253  * Misc globals initialized in parse_cmdline()
    254  */
    255 
    256 char *Prog = NULL;		/* set up in parse_cmdline()                */
    257 int Upanic_Conditions;		/* set by args to -U                        */
    258 int Release_Interval;		/* arg to -r                                */
    259 int Nprocs;			/* arg to -n                                */
    260 char *Write_Log;		/* arg to -w                                */
    261 char *Infile;			/* input file (defaults to stdin)           */
    262 int *Children;			/* pids of child procs                      */
    263 int Nchildren = 0;
    264 int Nsiblings = 0;		/* tfork'ed siblings                        */
    265 int Execd = 0;
    266 int Message_Interval = 0;
    267 int Npes = 0;			/* non-zero if built as an mpp multi-pe app */
    268 int Vpe = -1;			/* Virtual pe number if Npes >= 0           */
    269 int Reqno = 1;			/* request # - used in some error messages  */
    270 int Reqskipcnt = 0;		/* count of I/O requests that are skipped   */
    271 int Validation_Flags;
    272 char *(*Data_Check) ();		/* function to call for data checking       */
    273 int (*Data_Fill) ();		/* function to call for data filling        */
    274 int Nmemalloc = 0;		/* number of memory allocation strategies   */
    275 int delayop = 0;		/* delay between operations - type of delay */
    276 int delaytime = 0;		/* delay between operations - how long      */
    277 
    278 struct wlog_file Wlog;
    279 
    280 int active_mmap_rw = 0;		/* Indicates that mmapped I/O is occurring. */
    281 			    /* Used by sigbus_action() in the child doio. */
    282 int havesigint = 0;
    283 
    284 #define SKIP_REQ	-2	/* skip I/O request */
    285 
    286 /*
    287  * Global file descriptors
    288  */
    289 
    290 int Wfd_Append;			/* for appending to the write-log       */
    291 int Wfd_Random;			/* for overlaying write-log entries     */
    292 
    293 #define FD_ALLOC_INCR	32	/* allocate this many fd_map structs    */
    294 				/* at a time */
    295 
    296 /*
    297  * Globals for tracking Sds and Core usage
    298  */
    299 
    300 char *Memptr;			/* ptr to core buffer space             */
    301 int Memsize;			/* # bytes pointed to by Memptr         */
    302 				/* maintained by alloc_mem()            */
    303 
    304 int Sdsptr;			/* sds offset (always 0)                */
    305 int Sdssize;			/* # bytes of allocated sds space       */
    306 				/* Maintained by alloc_sds()            */
    307 char Host[16];
    308 char Pattern[128];
    309 int Pattern_Length;
    310 
    311 /*
    312  * Signal handlers, and related globals
    313  */
    314 
    315 char *syserrno(int err);
    316 void doio(void);
    317 void doio_delay(void);
    318 char *format_oflags(int oflags);
    319 char *format_strat(int strategy);
    320 char *format_rw(struct io_req *ioreq, int fd, void *buffer,
    321 		int signo, char *pattern, void *iosw);
    322 #ifdef CRAY
    323 char *format_sds(struct io_req *ioreq, void *buffer, int sds char *pattern);
    324 #endif /* CRAY */
    325 
    326 int do_read(struct io_req *req);
    327 int do_write(struct io_req *req);
    328 int lock_file_region(char *fname, int fd, int type, int start, int nbytes);
    329 
    330 #ifdef CRAY
    331 char *format_listio(struct io_req *ioreq, int lcmd,
    332 		    struct listreq *list, int nent, int fd, char *pattern);
    333 #endif /* CRAY */
    334 
    335 int do_listio(struct io_req *req);
    336 
    337 #if defined(_CRAY1) || defined(CRAY)
    338 int do_ssdio(struct io_req *req);
    339 #endif /* defined(_CRAY1) || defined(CRAY) */
    340 
    341 char *fmt_ioreq(struct io_req *ioreq, struct syscall_info *sy, int fd);
    342 
    343 #ifdef CRAY
    344 struct status *sy_listio(struct io_req *req, struct syscall_info *sysc,
    345 			 int fd, char *addr);
    346 int listio_mem(struct io_req *req, int offset, int fmstride,
    347 	       int *min, int *max);
    348 char *fmt_listio(struct io_req *req, struct syscall_info *sy,
    349 		 int fd, char *addr);
    350 #endif /* CRAY */
    351 
    352 #ifdef sgi
    353 struct status *sy_pread(struct io_req *req, struct syscall_info *sysc,
    354 			int fd, char *addr);
    355 struct status *sy_pwrite(struct io_req *req, struct syscall_info *sysc,
    356 			 int fd, char *addr);
    357 char *fmt_pread(struct io_req *req, struct syscall_info *sy,
    358 		int fd, char *addr);
    359 #endif /* sgi */
    360 
    361 #ifndef CRAY
    362 struct status *sy_readv(struct io_req *req, struct syscall_info *sysc,
    363 			int fd, char *addr);
    364 struct status *sy_writev(struct io_req *req, struct syscall_info *sysc,
    365 			 int fd, char *addr);
    366 struct status *sy_rwv(struct io_req *req, struct syscall_info *sysc,
    367 		      int fd, char *addr, int rw);
    368 char *fmt_readv(struct io_req *req, struct syscall_info *sy,
    369 		int fd, char *addr);
    370 #endif /* !CRAY */
    371 
    372 #ifdef sgi
    373 struct status *sy_aread(struct io_req *req, struct syscall_info *sysc,
    374 			int fd, char *addr);
    375 struct status *sy_awrite(struct io_req *req, struct syscall_info *sysc,
    376 			 int fd, char *addr)
    377 struct status *sy_arw(struct io_req *req, struct syscall_info *sysc,
    378 		      int fd, char *addr, int rw);
    379 char *fmt_aread(struct io_req *req, struct syscall_info *sy,
    380 		int fd, char *addr);
    381 #endif /* sgi */
    382 
    383 #ifndef CRAY
    384 struct status *sy_mmread(struct io_req *req, struct syscall_info *sysc,
    385 			 int fd, char *addr);
    386 struct status *sy_mmwrite(struct io_req *req, struct syscall_info *sysc,
    387 			  int fd, char *addr);
    388 struct status *sy_mmrw(struct io_req *req, struct syscall_info *sysc,
    389 		       int fd, char *addr, int rw);
    390 char *fmt_mmrw(struct io_req *req, struct syscall_info *sy, int fd, char *addr);
    391 #endif /* !CRAY */
    392 
    393 int do_rw(struct io_req *req);
    394 
    395 #ifdef sgi
    396 int do_fcntl(struct io_req *req);
    397 #endif /* sgi */
    398 
    399 #ifndef CRAY
    400 int do_sync(struct io_req *req);
    401 #endif /* !CRAY */
    402 
    403 int doio_pat_fill(char *addr, int mem_needed, char *Pattern,
    404 		  int Pattern_Length, int shift);
    405 char *doio_pat_check(char *buf, int offset, int length,
    406 		     char *pattern, int pattern_length, int patshift);
    407 char *check_file(char *file, int offset, int length, char *pattern,
    408 		 int pattern_length, int patshift, int fsa);
    409 int doio_fprintf(FILE * stream, char *format, ...);
    410 int alloc_mem(int nbytes);
    411 
    412 #if defined(_CRAY1) || defined(CRAY)
    413 int alloc_sds(int nbytes);
    414 #endif /* defined(_CRAY1) || defined(CRAY) */
    415 
    416 int alloc_fd(char *file, int oflags);
    417 struct fd_cache *alloc_fdcache(char *file, int oflags);
    418 
    419 #ifdef sgi
    420 void signal_info(int sig, siginfo_t * info, void *v);
    421 void cleanup_handler(int sig, siginfo_t * info, void *v);
    422 void die_handler(int sig, siginfo_t * info, void *v);
    423 void sigbus_handler(int sig, siginfo_t * info, void *v);
    424 #else /* !sgi */
    425 void cleanup_handler(int sig);
    426 void die_handler(int sig);
    427 
    428 #ifndef CRAY
    429 void sigbus_handler(int sig);
    430 #endif /* !CRAY */
    431 #endif /* sgi */
    432 
    433 void noop_handler(int sig);
    434 void sigint_handler(int sig);
    435 void aio_handler(int sig);
    436 void dump_aio(void);
    437 
    438 #ifdef sgi
    439 void cb_handler(sigval_t val);
    440 #endif /* sgi */
    441 
    442 struct aio_info *aio_slot(int aio_id);
    443 int aio_register(int fd, int strategy, int sig);
    444 int aio_unregister(int aio_id);
    445 
    446 #ifndef __linux__
    447 int aio_wait(int aio_id);
    448 #endif /* !__linux__ */
    449 
    450 char *hms(time_t t);
    451 int aio_done(struct aio_info *ainfo);
    452 void doio_upanic(int mask);
    453 int parse_cmdline(int argc, char **argv, char *opts);
    454 
    455 #ifndef CRAY
    456 void parse_memalloc(char *arg);
    457 void dump_memalloc(void);
    458 #endif /* !CRAY */
    459 
    460 void parse_delay(char *arg);
    461 int usage(FILE * stream);
    462 void help(FILE * stream);
    463 
    464 /*
    465  * Upanic conditions, and a map from symbolics to values
    466  */
    467 
    468 #define U_CORRUPTION	0001	/* upanic on data corruption    */
    469 #define U_IOSW	    	0002	/* upanic on bad iosw           */
    470 #define U_RVAL	    	0004	/* upanic on bad rval           */
    471 
    472 #define U_ALL	    	(U_CORRUPTION | U_IOSW | U_RVAL)
    473 
    474 struct smap Upanic_Args[] = {
    475 	{"corruption", U_CORRUPTION},
    476 	{"iosw", U_IOSW},
    477 	{"rval", U_RVAL},
    478 	{"all", U_ALL},
    479 	{NULL, 0}
    480 };
    481 
    482 struct aio_info Aio_Info[MAX_AIO];
    483 
    484 /* -C data-fill/check type */
    485 #define	C_DEFAULT	1
    486 struct smap checkmap[] = {
    487 	{"default", C_DEFAULT},
    488 	{NULL, 0},
    489 };
    490 
    491 /* -d option delay types */
    492 #define	DELAY_SELECT	1
    493 #define	DELAY_SLEEP	2
    494 #define	DELAY_SGINAP	3
    495 #define	DELAY_ALARM	4
    496 #define	DELAY_ITIMER	5	/* POSIX timer                          */
    497 
    498 struct smap delaymap[] = {
    499 	{"select", DELAY_SELECT},
    500 	{"sleep", DELAY_SLEEP},
    501 #ifdef sgi
    502 	{"sginap", DELAY_SGINAP},
    503 #endif
    504 	{"alarm", DELAY_ALARM},
    505 	{NULL, 0},
    506 };
    507 
    508 /******
    509 *
    510 * strerror() does similar actions.
    511 
    512 char *
    513 syserrno(int err)
    514 {
    515     static char sys_errno[10];
    516     sprintf(sys_errno, "%d", errno);
    517     return(sys_errno);
    518 }
    519 
    520 ******/
    521 
    522 int main(int argc, char **argv)
    523 {
    524 	int i, pid, stat, ex_stat;
    525 #ifdef CRAY
    526 	sigset_t omask;
    527 #elif defined(linux)
    528 	sigset_t omask, block_mask;
    529 #else
    530 	int omask;
    531 #endif
    532 	struct sigaction sa;
    533 
    534 	umask(0);		/* force new file modes to known values */
    535 #if _CRAYMPP
    536 	Npes = sysconf(_SC_CRAY_NPES);	/* must do this before parse_cmdline */
    537 	Vpe = sysconf(_SC_CRAY_VPE);
    538 #endif
    539 
    540 	TagName[0] = '\0';
    541 	parse_cmdline(argc, argv, OPTS);
    542 
    543 	random_range_seed(getpid());	/* initialize random number generator */
    544 
    545 	/*
    546 	 * If this is a re-exec of doio, jump directly into the doio function.
    547 	 */
    548 
    549 	if (Execd) {
    550 		doio();
    551 		exit(E_SETUP);
    552 	}
    553 
    554 	/*
    555 	 * Stop on all but a few signals...
    556 	 */
    557 	sigemptyset(&sa.sa_mask);
    558 	sa.sa_handler = sigint_handler;
    559 	sa.sa_flags = SA_RESETHAND;	/* sigint is ignored after the */
    560 	/* first time */
    561 	for (i = 1; i <= NSIG; i++) {
    562 		switch (i) {
    563 #ifdef SIGRECOVERY
    564 		case SIGRECOVERY:
    565 			break;
    566 #endif
    567 #ifdef SIGCKPT
    568 		case SIGCKPT:
    569 #endif
    570 #ifdef SIGRESTART
    571 		case SIGRESTART:
    572 #endif
    573 		case SIGTSTP:
    574 		case SIGSTOP:
    575 		case SIGCONT:
    576 		case SIGCHLD:
    577 		case SIGBUS:
    578 		case SIGSEGV:
    579 		case SIGQUIT:
    580 			break;
    581 		default:
    582 			sigaction(i, &sa, NULL);
    583 		}
    584 	}
    585 
    586 	/*
    587 	 * If we're logging write operations, make a dummy call to wlog_open
    588 	 * to initialize the write history file.  This call must be done in
    589 	 * the parent, to ensure that the history file exists and/or has
    590 	 * been truncated before any children attempt to open it, as the doio
    591 	 * children are not allowed to truncate the file.
    592 	 */
    593 
    594 	if (w_opt) {
    595 		strcpy(Wlog.w_file, Write_Log);
    596 
    597 		if (wlog_open(&Wlog, 1, 0666) < 0) {
    598 			doio_fprintf(stderr,
    599 				     "Could not create/truncate write log %s\n",
    600 				     Write_Log);
    601 			exit(2);
    602 		}
    603 
    604 		wlog_close(&Wlog);
    605 	}
    606 
    607 	/*
    608 	 * Malloc space for the children pid array.  Initialize all entries
    609 	 * to -1.
    610 	 */
    611 
    612 	Children = malloc(sizeof(int) * Nprocs);
    613 	for (i = 0; i < Nprocs; i++) {
    614 		Children[i] = -1;
    615 	}
    616 
    617 	sigemptyset(&block_mask);
    618 	sigaddset(&block_mask, SIGCHLD);
    619 	sigprocmask(SIG_BLOCK, &block_mask, &omask);
    620 
    621 	/*
    622 	 * Fork Nprocs.  This [parent] process is a watchdog, to notify the
    623 	 * invoker of procs which exit abnormally, and to make sure that all
    624 	 * child procs get cleaned up.  If the -e option was used, we will also
    625 	 * re-exec.  This is mostly for unicos/mk on mpp's, to ensure that not
    626 	 * all of the doio's don't end up in the same pe.
    627 	 *
    628 	 * Note - if Nprocs is 1, or this doio is a multi-pe app (Npes > 1),
    629 	 * jump directly to doio().  multi-pe apps can't fork(), and there is
    630 	 * no reason to fork() for 1 proc.
    631 	 */
    632 
    633 	if (Nprocs == 1 || Npes > 1) {
    634 		doio();
    635 		exit(0);
    636 	} else {
    637 		for (i = 0; i < Nprocs; i++) {
    638 			if ((pid = fork()) == -1) {
    639 				doio_fprintf(stderr,
    640 					     "(parent) Could not fork %d children:  %s (%d)\n",
    641 					     i + 1, SYSERR, errno);
    642 				exit(E_SETUP);
    643 			}
    644 
    645 			Children[Nchildren] = pid;
    646 			Nchildren++;
    647 
    648 			if (pid == 0) {
    649 				if (e_opt) {
    650 					char *exec_path;
    651 
    652 					exec_path = argv[0];
    653 					argv[0] = malloc(strlen(exec_path) + 2);
    654 					sprintf(argv[0], "-%s", exec_path);
    655 
    656 					execvp(exec_path, argv);
    657 					doio_fprintf(stderr,
    658 						     "(parent) Could not execvp %s:  %s (%d)\n",
    659 						     exec_path, SYSERR, errno);
    660 					exit(E_SETUP);
    661 				} else {
    662 					doio();
    663 					exit(E_SETUP);
    664 				}
    665 			}
    666 		}
    667 
    668 		/*
    669 		 * Parent spins on wait(), until all children exit.
    670 		 */
    671 
    672 		ex_stat = E_NORMAL;
    673 
    674 		while (Nprocs) {
    675 			if ((pid = wait(&stat)) == -1) {
    676 				if (errno == EINTR)
    677 					continue;
    678 			}
    679 
    680 			for (i = 0; i < Nchildren; i++)
    681 				if (Children[i] == pid)
    682 					Children[i] = -1;
    683 
    684 			Nprocs--;
    685 
    686 			if (WIFEXITED(stat)) {
    687 				switch (WEXITSTATUS(stat)) {
    688 				case E_NORMAL:
    689 					/* noop */
    690 					break;
    691 
    692 				case E_INTERNAL:
    693 					doio_fprintf(stderr,
    694 						     "(parent) pid %d exited because of an internal error\n",
    695 						     pid);
    696 					ex_stat |= E_INTERNAL;
    697 					break;
    698 
    699 				case E_SETUP:
    700 					doio_fprintf(stderr,
    701 						     "(parent) pid %d exited because of a setup error\n",
    702 						     pid);
    703 					ex_stat |= E_SETUP;
    704 					break;
    705 
    706 				case E_COMPARE:
    707 					doio_fprintf(stderr,
    708 						     "(parent) pid %d exited because of data compare errors\n",
    709 						     pid);
    710 
    711 					ex_stat |= E_COMPARE;
    712 
    713 					if (a_opt)
    714 						kill(0, SIGINT);
    715 
    716 					break;
    717 
    718 				case E_USAGE:
    719 					doio_fprintf(stderr,
    720 						     "(parent) pid %d exited because of a usage error\n",
    721 						     pid);
    722 
    723 					ex_stat |= E_USAGE;
    724 					break;
    725 
    726 				default:
    727 					doio_fprintf(stderr,
    728 						     "(parent) pid %d exited with unknown status %d\n",
    729 						     pid, WEXITSTATUS(stat));
    730 					ex_stat |= E_INTERNAL;
    731 					break;
    732 				}
    733 			} else if (WIFSIGNALED(stat)
    734 				   && WTERMSIG(stat) != SIGINT) {
    735 				doio_fprintf(stderr,
    736 					     "(parent) pid %d terminated by signal %d\n",
    737 					     pid, WTERMSIG(stat));
    738 
    739 				ex_stat |= E_SIGNAL;
    740 			}
    741 
    742 			fflush(NULL);
    743 		}
    744 	}
    745 
    746 	exit(ex_stat);
    747 
    748 }				/* main */
    749 
    750 /*
    751  * main doio function.  Each doio child starts here, and never returns.
    752  */
    753 
    754 void doio(void)
    755 {
    756 	int rval, i, infd, nbytes;
    757 	char *cp;
    758 	struct io_req ioreq;
    759 	struct sigaction sa, def_action, ignore_action, exit_action;
    760 #ifndef CRAY
    761 	struct sigaction sigbus_action;
    762 #endif
    763 
    764 	Memsize = Sdssize = 0;
    765 
    766 	/*
    767 	 * Initialize the Pattern - write-type syscalls will replace Pattern[1]
    768 	 * with the pattern passed in the request.  Make sure that
    769 	 * strlen(Pattern) is not mod 16 so that out of order words will be
    770 	 * detected.
    771 	 */
    772 
    773 	gethostname(Host, sizeof(Host));
    774 	if ((cp = strchr(Host, '.')) != NULL)
    775 		*cp = '\0';
    776 
    777 	Pattern_Length = sprintf(Pattern, "-:%d:%s:%s*", getpid(), Host, Prog);
    778 
    779 	if (!(Pattern_Length % 16)) {
    780 		Pattern_Length = sprintf(Pattern, "-:%d:%s:%s**",
    781 					 getpid(), Host, Prog);
    782 	}
    783 
    784 	/*
    785 	 * Open a couple of descriptors for the write-log file.  One descriptor
    786 	 * is for appending, one for random access.  Write logging is done for
    787 	 * file corruption detection.  The program doio_check is capable of
    788 	 * doing corruption detection based on a doio write-log.
    789 	 */
    790 
    791 	if (w_opt) {
    792 
    793 		strcpy(Wlog.w_file, Write_Log);
    794 
    795 		if (wlog_open(&Wlog, 0, 0666) == -1) {
    796 			doio_fprintf(stderr,
    797 				     "Could not open write log file (%s): wlog_open() failed\n",
    798 				     Write_Log);
    799 			exit(E_SETUP);
    800 		}
    801 	}
    802 
    803 	/*
    804 	 * Open the input stream - either a file or stdin
    805 	 */
    806 
    807 	if (Infile == NULL) {
    808 		infd = 0;
    809 	} else {
    810 		if ((infd = open(Infile, O_RDWR)) == -1) {
    811 			doio_fprintf(stderr,
    812 				     "Could not open input file (%s):  %s (%d)\n",
    813 				     Infile, SYSERR, errno);
    814 			exit(E_SETUP);
    815 		}
    816 	}
    817 
    818 	/*
    819 	 * Define a set of signals that should never be masked.  Receipt of
    820 	 * these signals generally indicates a programming error, and we want
    821 	 * a corefile at the point of error.  We put SIGQUIT in this list so
    822 	 * that ^\ will force a user core dump.
    823 	 *
    824 	 * Note:  the handler for these should be SIG_DFL, all of them
    825 	 * produce a corefile as the default action.
    826 	 */
    827 
    828 	ignore_action.sa_handler = SIG_IGN;
    829 	ignore_action.sa_flags = 0;
    830 	sigemptyset(&ignore_action.sa_mask);
    831 
    832 	def_action.sa_handler = SIG_DFL;
    833 	def_action.sa_flags = 0;
    834 	sigemptyset(&def_action.sa_mask);
    835 
    836 #ifdef sgi
    837 	exit_action.sa_sigaction = cleanup_handler;
    838 	exit_action.sa_flags = SA_SIGINFO;
    839 	sigemptyset(&exit_action.sa_mask);
    840 
    841 	sa.sa_sigaction = die_handler;
    842 	sa.sa_flags = SA_SIGINFO;
    843 	sigemptyset(&sa.sa_mask);
    844 
    845 	sigbus_action.sa_sigaction = sigbus_handler;
    846 	sigbus_action.sa_flags = SA_SIGINFO;
    847 	sigemptyset(&sigbus_action.sa_mask);
    848 #else
    849 	exit_action.sa_handler = cleanup_handler;
    850 	exit_action.sa_flags = 0;
    851 	sigemptyset(&exit_action.sa_mask);
    852 
    853 	sa.sa_handler = die_handler;
    854 	sa.sa_flags = 0;
    855 	sigemptyset(&sa.sa_mask);
    856 
    857 #ifndef CRAY
    858 	sigbus_action.sa_handler = sigbus_handler;
    859 	sigbus_action.sa_flags = 0;
    860 	sigemptyset(&sigbus_action.sa_mask);
    861 #endif
    862 #endif
    863 
    864 	for (i = 1; i <= NSIG; i++) {
    865 		switch (i) {
    866 			/* Signals to terminate program on */
    867 		case SIGINT:
    868 			sigaction(i, &exit_action, NULL);
    869 			break;
    870 
    871 #ifndef CRAY
    872 			/* This depends on active_mmap_rw */
    873 		case SIGBUS:
    874 			sigaction(i, &sigbus_action, NULL);
    875 			break;
    876 #endif
    877 
    878 			/* Signals to Ignore... */
    879 		case SIGSTOP:
    880 		case SIGCONT:
    881 #ifdef SIGRECOVERY
    882 		case SIGRECOVERY:
    883 #endif
    884 			sigaction(i, &ignore_action, NULL);
    885 			break;
    886 
    887 			/* Signals to trap & report & die */
    888 			/*case SIGTRAP: */
    889 			/*case SIGABRT: */
    890 #ifdef SIGERR			/* cray only signals */
    891 		case SIGERR:
    892 		case SIGBUFIO:
    893 		case SIGINFO:
    894 #endif
    895 			/*case SIGFPE: */
    896 		case SIGURG:
    897 		case SIGHUP:
    898 		case SIGTERM:
    899 		case SIGPIPE:
    900 		case SIGIO:
    901 		case SIGUSR1:
    902 		case SIGUSR2:
    903 			sigaction(i, &sa, NULL);
    904 			break;
    905 
    906 			/* Default Action for all other signals */
    907 		default:
    908 			sigaction(i, &def_action, NULL);
    909 			break;
    910 		}
    911 	}
    912 
    913 	/*
    914 	 * Main loop - each doio proc does this until the read returns eof (0).
    915 	 * Call the appropriate io function based on the request type.
    916 	 */
    917 
    918 	while ((nbytes = read(infd, (char *)&ioreq, sizeof(ioreq)))) {
    919 
    920 		/*
    921 		 * Periodically check our ppid.  If it is 1, the child exits to
    922 		 * help clean up in the case that the main doio process was
    923 		 * killed.
    924 		 */
    925 
    926 		if (Reqno && ((Reqno % PPID_CHECK_INTERVAL) == 0)) {
    927 			if (getppid() == 1) {
    928 				doio_fprintf(stderr,
    929 					     "Parent doio process has exited\n");
    930 				alloc_mem(-1);
    931 				exit(E_SETUP);
    932 			}
    933 		}
    934 
    935 		if (nbytes == -1) {
    936 			doio_fprintf(stderr,
    937 				     "read of %d bytes from input failed:  %s (%d)\n",
    938 				     sizeof(ioreq), SYSERR, errno);
    939 			alloc_mem(-1);
    940 			exit(E_SETUP);
    941 		}
    942 
    943 		if (nbytes != sizeof(ioreq)) {
    944 			doio_fprintf(stderr,
    945 				     "read wrong # bytes from input stream, expected %d, got %d\n",
    946 				     sizeof(ioreq), nbytes);
    947 			alloc_mem(-1);
    948 			exit(E_SETUP);
    949 		}
    950 
    951 		if (ioreq.r_magic != DOIO_MAGIC) {
    952 			doio_fprintf(stderr,
    953 				     "got a bad magic # from input stream.  Expected 0%o, got 0%o\n",
    954 				     DOIO_MAGIC, ioreq.r_magic);
    955 			alloc_mem(-1);
    956 			exit(E_SETUP);
    957 		}
    958 
    959 		/*
    960 		 * If we're on a Release_Interval multiple, relase all ssd and
    961 		 * core space, and close all fd's in Fd_Map[].
    962 		 */
    963 
    964 		if (Reqno && Release_Interval && !(Reqno % Release_Interval)) {
    965 			if (Memsize) {
    966 #ifdef NOTDEF
    967 				sbrk(-1 * Memsize);
    968 #else
    969 				alloc_mem(-1);
    970 #endif
    971 			}
    972 #ifdef _CRAY1
    973 			if (Sdssize) {
    974 				ssbreak(-1 * btoc(Sdssize));
    975 				Sdsptr = 0;
    976 				Sdssize = 0;
    977 			}
    978 #endif /* _CRAY1 */
    979 
    980 			alloc_fd(NULL, 0);
    981 		}
    982 
    983 		switch (ioreq.r_type) {
    984 		case READ:
    985 		case READA:
    986 			rval = do_read(&ioreq);
    987 			break;
    988 
    989 		case WRITE:
    990 		case WRITEA:
    991 			rval = do_write(&ioreq);
    992 			break;
    993 
    994 		case READV:
    995 		case AREAD:
    996 		case PREAD:
    997 		case LREAD:
    998 		case LREADA:
    999 		case LSREAD:
   1000 		case LSREADA:
   1001 		case WRITEV:
   1002 		case AWRITE:
   1003 		case PWRITE:
   1004 		case MMAPR:
   1005 		case MMAPW:
   1006 		case LWRITE:
   1007 		case LWRITEA:
   1008 		case LSWRITE:
   1009 		case LSWRITEA:
   1010 		case LEREAD:
   1011 		case LEREADA:
   1012 		case LEWRITE:
   1013 		case LEWRITEA:
   1014 			rval = do_rw(&ioreq);
   1015 			break;
   1016 
   1017 #ifdef CRAY
   1018 		case SSREAD:
   1019 		case SSWRITE:
   1020 			rval = do_ssdio(&ioreq);
   1021 			break;
   1022 
   1023 		case LISTIO:
   1024 			rval = do_listio(&ioreq);
   1025 			break;
   1026 #endif
   1027 
   1028 #ifdef sgi
   1029 		case RESVSP:
   1030 		case UNRESVSP:
   1031 #ifdef F_FSYNC
   1032 		case DFFSYNC:
   1033 #endif
   1034 			rval = do_fcntl(&ioreq);
   1035 			break;
   1036 #endif /* sgi */
   1037 
   1038 #ifndef CRAY
   1039 		case FSYNC2:
   1040 		case FDATASYNC:
   1041 			rval = do_sync(&ioreq);
   1042 			break;
   1043 #endif
   1044 		default:
   1045 			doio_fprintf(stderr,
   1046 				     "Don't know how to handle io request type %d\n",
   1047 				     ioreq.r_type);
   1048 			alloc_mem(-1);
   1049 			exit(E_SETUP);
   1050 		}
   1051 
   1052 		if (rval == SKIP_REQ) {
   1053 			Reqskipcnt++;
   1054 		} else if (rval != 0) {
   1055 			alloc_mem(-1);
   1056 			doio_fprintf(stderr,
   1057 				     "doio(): operation %d returned != 0\n",
   1058 				     ioreq.r_type);
   1059 			exit(E_SETUP);
   1060 		}
   1061 
   1062 		if (Message_Interval && Reqno % Message_Interval == 0) {
   1063 			doio_fprintf(stderr,
   1064 				     "Info:  %d requests done (%d skipped) by this process\n",
   1065 				     Reqno, Reqskipcnt);
   1066 		}
   1067 
   1068 		Reqno++;
   1069 
   1070 		if (delayop != 0)
   1071 			doio_delay();
   1072 	}
   1073 
   1074 	/*
   1075 	 * Child exits normally
   1076 	 */
   1077 	alloc_mem(-1);
   1078 	exit(E_NORMAL);
   1079 
   1080 }				/* doio */
   1081 
   1082 void doio_delay(void)
   1083 {
   1084 	struct timeval tv_delay;
   1085 	struct sigaction sa_al, sa_old;
   1086 	sigset_t al_mask;
   1087 
   1088 	switch (delayop) {
   1089 	case DELAY_SELECT:
   1090 		tv_delay.tv_sec = delaytime / 1000000;
   1091 		tv_delay.tv_usec = delaytime % 1000000;
   1092 		/*doio_fprintf(stdout, "delay_select: %d %d\n",
   1093 		   tv_delay.tv_sec, tv_delay.tv_usec); */
   1094 		select(0, NULL, NULL, NULL, &tv_delay);
   1095 		break;
   1096 
   1097 	case DELAY_SLEEP:
   1098 		sleep(delaytime);
   1099 		break;
   1100 
   1101 #ifdef sgi
   1102 	case DELAY_SGINAP:
   1103 		sginap(delaytime);
   1104 		break;
   1105 #endif
   1106 
   1107 	case DELAY_ALARM:
   1108 		sa_al.sa_flags = 0;
   1109 		sa_al.sa_handler = noop_handler;
   1110 		sigemptyset(&sa_al.sa_mask);
   1111 		sigaction(SIGALRM, &sa_al, &sa_old);
   1112 		sigemptyset(&al_mask);
   1113 		alarm(delaytime);
   1114 		sigsuspend(&al_mask);
   1115 		sigaction(SIGALRM, &sa_old, 0);
   1116 		break;
   1117 	}
   1118 }
   1119 
   1120 /*
   1121  * Format IO requests, returning a pointer to the formatted text.
   1122  *
   1123  * format_strat	- formats the async i/o completion strategy
   1124  * format_rw	- formats a read[a]/write[a] request
   1125  * format_sds	- formats a ssread/sswrite request
   1126  * format_listio- formats a listio request
   1127  *
   1128  * ioreq is the doio io request structure.
   1129  */
   1130 
   1131 struct smap sysnames[] = {
   1132 	{"READ", READ},
   1133 	{"WRITE", WRITE},
   1134 	{"READA", READA},
   1135 	{"WRITEA", WRITEA},
   1136 	{"SSREAD", SSREAD},
   1137 	{"SSWRITE", SSWRITE},
   1138 	{"LISTIO", LISTIO},
   1139 	{"LREAD", LREAD},
   1140 	{"LREADA", LREADA},
   1141 	{"LWRITE", LWRITE},
   1142 	{"LWRITEA", LWRITEA},
   1143 	{"LSREAD", LSREAD},
   1144 	{"LSREADA", LSREADA},
   1145 	{"LSWRITE", LSWRITE},
   1146 	{"LSWRITEA", LSWRITEA},
   1147 
   1148 	/* Irix System Calls */
   1149 	{"PREAD", PREAD},
   1150 	{"PWRITE", PWRITE},
   1151 	{"AREAD", AREAD},
   1152 	{"AWRITE", AWRITE},
   1153 	{"LLREAD", LLREAD},
   1154 	{"LLAREAD", LLAREAD},
   1155 	{"LLWRITE", LLWRITE},
   1156 	{"LLAWRITE", LLAWRITE},
   1157 	{"RESVSP", RESVSP},
   1158 	{"UNRESVSP", UNRESVSP},
   1159 	{"DFFSYNC", DFFSYNC},
   1160 
   1161 	/* Irix and Linux System Calls */
   1162 	{"READV", READV},
   1163 	{"WRITEV", WRITEV},
   1164 	{"MMAPR", MMAPR},
   1165 	{"MMAPW", MMAPW},
   1166 	{"FSYNC2", FSYNC2},
   1167 	{"FDATASYNC", FDATASYNC},
   1168 
   1169 	{"unknown", -1},
   1170 };
   1171 
   1172 struct smap aionames[] = {
   1173 	{"poll", A_POLL},
   1174 	{"signal", A_SIGNAL},
   1175 	{"recall", A_RECALL},
   1176 	{"recalla", A_RECALLA},
   1177 	{"recalls", A_RECALLS},
   1178 	{"suspend", A_SUSPEND},
   1179 	{"callback", A_CALLBACK},
   1180 	{"synch", 0},
   1181 	{"unknown", -1},
   1182 };
   1183 
   1184 char *format_oflags(int oflags)
   1185 {
   1186 	char flags[255];
   1187 
   1188 	flags[0] = '\0';
   1189 	switch (oflags & 03) {
   1190 	case O_RDONLY:
   1191 		strcat(flags, "O_RDONLY,");
   1192 		break;
   1193 	case O_WRONLY:
   1194 		strcat(flags, "O_WRONLY,");
   1195 		break;
   1196 	case O_RDWR:
   1197 		strcat(flags, "O_RDWR,");
   1198 		break;
   1199 	default:
   1200 		strcat(flags, "O_weird");
   1201 		break;
   1202 	}
   1203 
   1204 	if (oflags & O_EXCL)
   1205 		strcat(flags, "O_EXCL,");
   1206 
   1207 	if (oflags & O_SYNC)
   1208 		strcat(flags, "O_SYNC,");
   1209 #ifdef CRAY
   1210 	if (oflags & O_RAW)
   1211 		strcat(flags, "O_RAW,");
   1212 	if (oflags & O_WELLFORMED)
   1213 		strcat(flags, "O_WELLFORMED,");
   1214 #ifdef O_SSD
   1215 	if (oflags & O_SSD)
   1216 		strcat(flags, "O_SSD,");
   1217 #endif
   1218 	if (oflags & O_LDRAW)
   1219 		strcat(flags, "O_LDRAW,");
   1220 	if (oflags & O_PARALLEL)
   1221 		strcat(flags, "O_PARALLEL,");
   1222 	if (oflags & O_BIG)
   1223 		strcat(flags, "O_BIG,");
   1224 	if (oflags & O_PLACE)
   1225 		strcat(flags, "O_PLACE,");
   1226 	if (oflags & O_ASYNC)
   1227 		strcat(flags, "O_ASYNC,");
   1228 #endif
   1229 
   1230 #ifdef sgi
   1231 	if (oflags & O_DIRECT)
   1232 		strcat(flags, "O_DIRECT,");
   1233 	if (oflags & O_DSYNC)
   1234 		strcat(flags, "O_DSYNC,");
   1235 	if (oflags & O_RSYNC)
   1236 		strcat(flags, "O_RSYNC,");
   1237 #endif
   1238 
   1239 	return (strdup(flags));
   1240 }
   1241 
   1242 char *format_strat(int strategy)
   1243 {
   1244 	char msg[64];
   1245 	char *aio_strat;
   1246 
   1247 	switch (strategy) {
   1248 	case A_POLL:
   1249 		aio_strat = "POLL";
   1250 		break;
   1251 	case A_SIGNAL:
   1252 		aio_strat = "SIGNAL";
   1253 		break;
   1254 	case A_RECALL:
   1255 		aio_strat = "RECALL";
   1256 		break;
   1257 	case A_RECALLA:
   1258 		aio_strat = "RECALLA";
   1259 		break;
   1260 	case A_RECALLS:
   1261 		aio_strat = "RECALLS";
   1262 		break;
   1263 	case A_SUSPEND:
   1264 		aio_strat = "SUSPEND";
   1265 		break;
   1266 	case A_CALLBACK:
   1267 		aio_strat = "CALLBACK";
   1268 		break;
   1269 	case 0:
   1270 		aio_strat = "<zero>";
   1271 		break;
   1272 	default:
   1273 		sprintf(msg, "<error:%#o>", strategy);
   1274 		aio_strat = strdup(msg);
   1275 		break;
   1276 	}
   1277 
   1278 	return (aio_strat);
   1279 }
   1280 
   1281 char *format_rw(struct io_req *ioreq, int fd, void *buffer, int signo,
   1282 		char *pattern, void *iosw)
   1283 {
   1284 	static char *errbuf = NULL;
   1285 	char *aio_strat, *cp;
   1286 	struct read_req *readp = &ioreq->r_data.read;
   1287 	struct write_req *writep = &ioreq->r_data.write;
   1288 	struct read_req *readap = &ioreq->r_data.read;
   1289 	struct write_req *writeap = &ioreq->r_data.write;
   1290 
   1291 	if (errbuf == NULL)
   1292 		errbuf = malloc(32768);
   1293 
   1294 	cp = errbuf;
   1295 	cp += sprintf(cp, "Request number %d\n", Reqno);
   1296 
   1297 	switch (ioreq->r_type) {
   1298 	case READ:
   1299 		cp += sprintf(cp, "syscall:  read(%d, %#lo, %d)\n",
   1300 			      fd, (unsigned long)buffer, readp->r_nbytes);
   1301 		cp +=
   1302 		    sprintf(cp,
   1303 			    "          fd %d is file %s - open flags are %#o\n",
   1304 			    fd, readp->r_file, readp->r_oflags);
   1305 		cp +=
   1306 		    sprintf(cp, "          read done at file offset %d\n",
   1307 			    readp->r_offset);
   1308 		break;
   1309 
   1310 	case WRITE:
   1311 		cp += sprintf(cp, "syscall:  write(%d, %#lo, %d)\n",
   1312 			      fd, (unsigned long)buffer, writep->r_nbytes);
   1313 		cp +=
   1314 		    sprintf(cp,
   1315 			    "          fd %d is file %s - open flags are %#o\n",
   1316 			    fd, writep->r_file, writep->r_oflags);
   1317 		cp +=
   1318 		    sprintf(cp,
   1319 			    "          write done at file offset %d - pattern is %s\n",
   1320 			    writep->r_offset, pattern);
   1321 		break;
   1322 
   1323 	case READA:
   1324 		aio_strat = format_strat(readap->r_aio_strat);
   1325 
   1326 		cp += sprintf(cp, "syscall:  reada(%d, %#lo, %d, %#lo, %d)\n",
   1327 			      fd, (unsigned long)buffer, readap->r_nbytes,
   1328 			      (unsigned long)iosw, signo);
   1329 		cp +=
   1330 		    sprintf(cp,
   1331 			    "          fd %d is file %s - open flags are %#o\n",
   1332 			    fd, readap->r_file, readp->r_oflags);
   1333 		cp +=
   1334 		    sprintf(cp, "          reada done at file offset %d\n",
   1335 			    readap->r_offset);
   1336 		cp +=
   1337 		    sprintf(cp,
   1338 			    "          async io completion strategy is %s\n",
   1339 			    aio_strat);
   1340 		break;
   1341 
   1342 	case WRITEA:
   1343 		aio_strat = format_strat(writeap->r_aio_strat);
   1344 
   1345 		cp += sprintf(cp, "syscall:  writea(%d, %#lo, %d, %#lo, %d)\n",
   1346 			      fd, (unsigned long)buffer, writeap->r_nbytes,
   1347 			      (unsigned long)iosw, signo);
   1348 		cp +=
   1349 		    sprintf(cp,
   1350 			    "          fd %d is file %s - open flags are %#o\n",
   1351 			    fd, writeap->r_file, writeap->r_oflags);
   1352 		cp +=
   1353 		    sprintf(cp,
   1354 			    "          writea done at file offset %d - pattern is %s\n",
   1355 			    writeap->r_offset, pattern);
   1356 		cp +=
   1357 		    sprintf(cp,
   1358 			    "          async io completion strategy is %s\n",
   1359 			    aio_strat);
   1360 		break;
   1361 
   1362 	}
   1363 
   1364 	return errbuf;
   1365 }
   1366 
   1367 #ifdef CRAY
   1368 char *format_sds(struct io_req *ioreq, void *buffer, int sds, char *pattern)
   1369 {
   1370 	int i;
   1371 	static char *errbuf = NULL;
   1372 	char *cp;
   1373 
   1374 	struct ssread_req *ssreadp = &ioreq->r_data.ssread;
   1375 	struct sswrite_req *sswritep = &ioreq->r_data.sswrite;
   1376 
   1377 	if (errbuf == NULL)
   1378 		errbuf = malloc(32768);
   1379 
   1380 	cp = errbuf;
   1381 	cp += sprintf(cp, "Request number %d\n", Reqno);
   1382 
   1383 	switch (ioreq->r_type) {
   1384 	case SSREAD:
   1385 		cp += sprintf(cp, "syscall:  ssread(%#o, %#o, %d)\n",
   1386 			      buffer, sds, ssreadp->r_nbytes);
   1387 		break;
   1388 
   1389 	case SSWRITE:
   1390 		cp +=
   1391 		    sprintf(cp,
   1392 			    "syscall:  sswrite(%#o, %#o, %d) - pattern was %s\n",
   1393 			    buffer, sds, sswritep->r_nbytes, pattern);
   1394 		break;
   1395 	}
   1396 	return errbuf;
   1397 }
   1398 #endif /* CRAY */
   1399 
   1400 /*
   1401  * Perform the various sorts of disk reads
   1402  */
   1403 
   1404 int do_read(struct io_req *req)
   1405 {
   1406 	int fd, offset, nbytes, oflags, rval;
   1407 	char *addr, *file;
   1408 #ifdef CRAY
   1409 	struct aio_info *aiop;
   1410 	int aio_id, aio_strat, signo;
   1411 #endif
   1412 #ifdef sgi
   1413 	struct fd_cache *fdc;
   1414 #endif
   1415 
   1416 	/*
   1417 	 * Initialize common fields - assumes r_oflags, r_file, r_offset, and
   1418 	 * r_nbytes are at the same offset in the read_req and reada_req
   1419 	 * structures.
   1420 	 */
   1421 
   1422 	file = req->r_data.read.r_file;
   1423 	oflags = req->r_data.read.r_oflags;
   1424 	offset = req->r_data.read.r_offset;
   1425 	nbytes = req->r_data.read.r_nbytes;
   1426 
   1427 	/*printf("read: %s, %#o, %d %d\n", file, oflags, offset, nbytes); */
   1428 
   1429 	/*
   1430 	 * Grab an open file descriptor
   1431 	 * Note: must be done before memory allocation so that the direct i/o
   1432 	 *      information is available in mem. allocate
   1433 	 */
   1434 
   1435 	if ((fd = alloc_fd(file, oflags)) == -1)
   1436 		return -1;
   1437 
   1438 	/*
   1439 	 * Allocate core or sds - based on the O_SSD flag
   1440 	 */
   1441 
   1442 #ifndef wtob
   1443 #define wtob(x)	(x * sizeof(UINT64_T))
   1444 #endif
   1445 
   1446 #ifdef CRAY
   1447 	if (oflags & O_SSD) {
   1448 		if (alloc_sds(nbytes) == -1)
   1449 			return -1;
   1450 
   1451 		addr = (char *)Sdsptr;
   1452 	} else {
   1453 		if ((rval =
   1454 		     alloc_mem(nbytes + wtob(1) * 2 +
   1455 			       MPP_BUMP * sizeof(UINT64_T))) < 0) {
   1456 			return rval;
   1457 		}
   1458 
   1459 		addr = Memptr;
   1460 
   1461 		/*
   1462 		 * if io is not raw, bump the offset by a random amount
   1463 		 * to generate non-word-aligned io.
   1464 		 */
   1465 		if (!(req->r_data.read.r_uflags & F_WORD_ALIGNED)) {
   1466 			addr += random_range(0, wtob(1) - 1, 1, NULL);
   1467 		}
   1468 	}
   1469 #else
   1470 #ifdef sgi
   1471 	/* get memory alignment for using DIRECT I/O */
   1472 	fdc = alloc_fdcache(file, oflags);
   1473 
   1474 	if ((rval = alloc_mem(nbytes + wtob(1) * 2 + fdc->c_memalign)) < 0) {
   1475 		return rval;
   1476 	}
   1477 
   1478 	addr = Memptr;
   1479 
   1480 	if ((req->r_data.read.r_uflags & F_WORD_ALIGNED)) {
   1481 		/*
   1482 		 * Force memory alignment for Direct I/O
   1483 		 */
   1484 		if ((oflags & O_DIRECT) && ((long)addr % fdc->c_memalign != 0)) {
   1485 			addr +=
   1486 			    fdc->c_memalign - ((long)addr % fdc->c_memalign);
   1487 		}
   1488 	} else {
   1489 		addr += random_range(0, wtob(1) - 1, 1, NULL);
   1490 	}
   1491 #else
   1492 	/* what is !CRAY && !sgi ? */
   1493 	if ((rval = alloc_mem(nbytes + wtob(1) * 2)) < 0) {
   1494 		return rval;
   1495 	}
   1496 
   1497 	addr = Memptr;
   1498 #endif /* !CRAY && sgi */
   1499 #endif /* CRAY */
   1500 
   1501 	switch (req->r_type) {
   1502 	case READ:
   1503 		/* move to the desired file position. */
   1504 		if (lseek(fd, offset, SEEK_SET) == -1) {
   1505 			doio_fprintf(stderr,
   1506 				     "lseek(%d, %d, SEEK_SET) failed:  %s (%d)\n",
   1507 				     fd, offset, SYSERR, errno);
   1508 			return -1;
   1509 		}
   1510 
   1511 		if ((rval = read(fd, addr, nbytes)) == -1) {
   1512 			doio_fprintf(stderr,
   1513 				     "read() request failed:  %s (%d)\n%s\n",
   1514 				     SYSERR, errno,
   1515 				     format_rw(req, fd, addr, -1, NULL, NULL));
   1516 			doio_upanic(U_RVAL);
   1517 			return -1;
   1518 		} else if (rval != nbytes) {
   1519 			doio_fprintf(stderr,
   1520 				     "read() request returned wrong # of bytes - expected %d, got %d\n%s\n",
   1521 				     nbytes, rval,
   1522 				     format_rw(req, fd, addr, -1, NULL, NULL));
   1523 			doio_upanic(U_RVAL);
   1524 			return -1;
   1525 		}
   1526 		break;
   1527 
   1528 #ifdef CRAY
   1529 	case READA:
   1530 		/*
   1531 		 * Async read
   1532 		 */
   1533 
   1534 		/* move to the desired file position. */
   1535 		if (lseek(fd, offset, SEEK_SET) == -1) {
   1536 			doio_fprintf(stderr,
   1537 				     "lseek(%d, %d, SEEK_SET) failed:  %s (%d)\n",
   1538 				     fd, offset, SYSERR, errno);
   1539 			return -1;
   1540 		}
   1541 
   1542 		aio_strat = req->r_data.read.r_aio_strat;
   1543 		signo = (aio_strat == A_SIGNAL) ? SIGUSR1 : 0;
   1544 
   1545 		aio_id = aio_register(fd, aio_strat, signo);
   1546 		aiop = aio_slot(aio_id);
   1547 
   1548 		if (reada(fd, addr, nbytes, &aiop->iosw, signo) == -1) {
   1549 			doio_fprintf(stderr, "reada() failed: %s (%d)\n%s\n",
   1550 				     SYSERR, errno,
   1551 				     format_rw(req, fd, addr, signo, NULL,
   1552 					       &aiop->iosw));
   1553 			aio_unregister(aio_id);
   1554 			doio_upanic(U_RVAL);
   1555 			rval = -1;
   1556 		} else {
   1557 			/*
   1558 			 * Wait for io to complete
   1559 			 */
   1560 
   1561 			aio_wait(aio_id);
   1562 
   1563 			/*
   1564 			 * make sure the io completed without error
   1565 			 */
   1566 
   1567 			if (aiop->iosw.sw_count != nbytes) {
   1568 				doio_fprintf(stderr,
   1569 					     "Bad iosw from reada()\nExpected (%d,%d,%d), got (%d,%d,%d)\n%s\n",
   1570 					     1, 0, nbytes,
   1571 					     aiop->iosw.sw_flag,
   1572 					     aiop->iosw.sw_error,
   1573 					     aiop->iosw.sw_count,
   1574 					     format_rw(req, fd, addr, signo,
   1575 						       NULL, &aiop->iosw));
   1576 				aio_unregister(aio_id);
   1577 				doio_upanic(U_IOSW);
   1578 				rval = -1;
   1579 			} else {
   1580 				aio_unregister(aio_id);
   1581 				rval = 0;
   1582 			}
   1583 		}
   1584 
   1585 		if (rval == -1)
   1586 			return rval;
   1587 		break;
   1588 #endif /* CRAY */
   1589 	}
   1590 
   1591 	return 0;		/* if we get here, everything went ok */
   1592 }
   1593 
   1594 /*
   1595  * Perform the verious types of disk writes.
   1596  */
   1597 
   1598 int do_write(struct io_req *req)
   1599 {
   1600 	static int pid = -1;
   1601 	int fd, nbytes, oflags, signo;
   1602 	int logged_write, rval, got_lock;
   1603 	off_t offset, woffset;
   1604 	char *addr, pattern, *file, *msg;
   1605 	struct wlog_rec wrec;
   1606 #ifdef CRAY
   1607 	int aio_strat, aio_id;
   1608 	struct aio_info *aiop;
   1609 #endif
   1610 #ifdef sgi
   1611 	struct fd_cache *fdc;
   1612 #endif
   1613 
   1614 	woffset = 0;
   1615 
   1616 	/*
   1617 	 * Misc variable setup
   1618 	 */
   1619 
   1620 	signo = 0;
   1621 	nbytes = req->r_data.write.r_nbytes;
   1622 	offset = req->r_data.write.r_offset;
   1623 	pattern = req->r_data.write.r_pattern;
   1624 	file = req->r_data.write.r_file;
   1625 	oflags = req->r_data.write.r_oflags;
   1626 
   1627 	/*printf("pwrite: %s, %#o, %d %d\n", file, oflags, offset, nbytes); */
   1628 
   1629 	/*
   1630 	 * Allocate core memory and possibly sds space.  Initialize the data
   1631 	 * to be written.
   1632 	 */
   1633 
   1634 	Pattern[0] = pattern;
   1635 
   1636 	/*
   1637 	 * Get a descriptor to do the io on
   1638 	 */
   1639 
   1640 	if ((fd = alloc_fd(file, oflags)) == -1)
   1641 		return -1;
   1642 
   1643 	/*printf("write: %d, %s, %#o, %d %d\n",
   1644 	   fd, file, oflags, offset, nbytes); */
   1645 
   1646 	/*
   1647 	 * Allocate SDS space for backdoor write if desired
   1648 	 */
   1649 
   1650 #ifdef CRAY
   1651 	if (oflags & O_SSD) {
   1652 #ifndef _CRAYMPP
   1653 		if ((rval = alloc_mem(nbytes + wtob(1))) < 0) {
   1654 			return rval;
   1655 		}
   1656 
   1657 		(*Data_Fill) (Memptr, nbytes, Pattern, Pattern_Length, 0);
   1658 		/*pattern_fill(Memptr, nbytes, Pattern, Pattern_Length, 0); */
   1659 
   1660 		if (alloc_sds(nbytes) == -1)
   1661 			return -1;
   1662 
   1663 		if (sswrite((long)Memptr, Sdsptr, btoc(nbytes)) == -1) {
   1664 			doio_fprintf(stderr,
   1665 				     "sswrite(%d, %d, %d) failed:  %s (%d)\n",
   1666 				     (long)Memptr, Sdsptr, btoc(nbytes), SYSERR,
   1667 				     errno);
   1668 			fflush(stderr);
   1669 			return -1;
   1670 		}
   1671 
   1672 		addr = (char *)Sdsptr;
   1673 #else
   1674 		doio_fprintf(stderr,
   1675 			     "Invalid O_SSD flag was generated for MPP system\n");
   1676 		fflush(stderr);
   1677 		return -1;
   1678 #endif /* !CRAYMPP */
   1679 	} else {
   1680 		if ((rval = alloc_mem(nbytes + wtob(1)) < 0)) {
   1681 			return rval;
   1682 		}
   1683 
   1684 		addr = Memptr;
   1685 
   1686 		/*
   1687 		 * if io is not raw, bump the offset by a random amount
   1688 		 * to generate non-word-aligned io.
   1689 		 */
   1690 
   1691 		if (!(req->r_data.write.r_uflags & F_WORD_ALIGNED)) {
   1692 			addr += random_range(0, wtob(1) - 1, 1, NULL);
   1693 		}
   1694 
   1695 		(*Data_Fill) (Memptr, nbytes, Pattern, Pattern_Length, 0);
   1696 		if (addr != Memptr)
   1697 			memmove(addr, Memptr, nbytes);
   1698 	}
   1699 #else /* CRAY */
   1700 #ifdef sgi
   1701 	/* get memory alignment for using DIRECT I/O */
   1702 	fdc = alloc_fdcache(file, oflags);
   1703 
   1704 	if ((rval = alloc_mem(nbytes + wtob(1) * 2 + fdc->c_memalign)) < 0) {
   1705 		return rval;
   1706 	}
   1707 
   1708 	addr = Memptr;
   1709 
   1710 	if ((req->r_data.write.r_uflags & F_WORD_ALIGNED)) {
   1711 		/*
   1712 		 * Force memory alignment for Direct I/O
   1713 		 */
   1714 		if ((oflags & O_DIRECT) && ((long)addr % fdc->c_memalign != 0)) {
   1715 			addr +=
   1716 			    fdc->c_memalign - ((long)addr % fdc->c_memalign);
   1717 		}
   1718 	} else {
   1719 		addr += random_range(0, wtob(1) - 1, 1, NULL);
   1720 	}
   1721 
   1722 	(*Data_Fill) (Memptr, nbytes, Pattern, Pattern_Length, 0);
   1723 	if (addr != Memptr)
   1724 		memmove(addr, Memptr, nbytes);
   1725 
   1726 #else /* sgi */
   1727 	if ((rval = alloc_mem(nbytes + wtob(1) * 2)) < 0) {
   1728 		return rval;
   1729 	}
   1730 
   1731 	addr = Memptr;
   1732 
   1733 	(*Data_Fill) (Memptr, nbytes, Pattern, Pattern_Length, 0);
   1734 	if (addr != Memptr)
   1735 		memmove(addr, Memptr, nbytes);
   1736 #endif /* sgi */
   1737 #endif /* CRAY */
   1738 
   1739 	rval = -1;
   1740 	got_lock = 0;
   1741 	logged_write = 0;
   1742 
   1743 	if (k_opt) {
   1744 		if (lock_file_region(file, fd, F_WRLCK, offset, nbytes) < 0) {
   1745 			alloc_mem(-1);
   1746 			exit(E_INTERNAL);
   1747 		}
   1748 
   1749 		got_lock = 1;
   1750 	}
   1751 
   1752 	/*
   1753 	 * Write a preliminary write-log entry.  This is done so that
   1754 	 * doio_check can do corruption detection across an interrupt/crash.
   1755 	 * Note that w_done is set to 0.  If doio_check sees this, it
   1756 	 * re-creates the file extents as if the write completed, but does not
   1757 	 * do any checking - see comments in doio_check for more details.
   1758 	 */
   1759 
   1760 	if (w_opt) {
   1761 		if (pid == -1) {
   1762 			pid = getpid();
   1763 		}
   1764 		wrec.w_async = (req->r_type == WRITEA) ? 1 : 0;
   1765 		wrec.w_oflags = oflags;
   1766 		wrec.w_pid = pid;
   1767 		wrec.w_offset = offset;
   1768 		wrec.w_nbytes = nbytes;
   1769 
   1770 		wrec.w_pathlen = strlen(file);
   1771 		memcpy(wrec.w_path, file, wrec.w_pathlen);
   1772 		wrec.w_hostlen = strlen(Host);
   1773 		memcpy(wrec.w_host, Host, wrec.w_hostlen);
   1774 		wrec.w_patternlen = Pattern_Length;
   1775 		memcpy(wrec.w_pattern, Pattern, wrec.w_patternlen);
   1776 
   1777 		wrec.w_done = 0;
   1778 
   1779 		if ((woffset = wlog_record_write(&Wlog, &wrec, -1)) == -1) {
   1780 			doio_fprintf(stderr,
   1781 				     "Could not append to write-log:  %s (%d)\n",
   1782 				     SYSERR, errno);
   1783 		} else {
   1784 			logged_write = 1;
   1785 		}
   1786 	}
   1787 
   1788 	switch (req->r_type) {
   1789 	case WRITE:
   1790 		/*
   1791 		 * sync write
   1792 		 */
   1793 
   1794 		if (lseek(fd, offset, SEEK_SET) == -1) {
   1795 			doio_fprintf(stderr,
   1796 				     "lseek(%d, %d, SEEK_SET) failed:  %s (%d)\n",
   1797 				     fd, offset, SYSERR, errno);
   1798 			return -1;
   1799 		}
   1800 
   1801 		rval = write(fd, addr, nbytes);
   1802 
   1803 		if (rval == -1) {
   1804 			doio_fprintf(stderr,
   1805 				     "write() failed:  %s (%d)\n%s\n",
   1806 				     SYSERR, errno,
   1807 				     format_rw(req, fd, addr, -1, Pattern,
   1808 					       NULL));
   1809 #ifdef sgi
   1810 			doio_fprintf(stderr,
   1811 				     "write() failed:  %s\n\twrite(%d, %#o, %d)\n\toffset %d, nbytes%%miniou(%d)=%d, oflags=%#o memalign=%d, addr%%memalign=%d\n",
   1812 				     strerror(errno),
   1813 				     fd, addr, nbytes,
   1814 				     offset,
   1815 				     fdc->c_miniosz, nbytes % fdc->c_miniosz,
   1816 				     oflags, fdc->c_memalign,
   1817 				     (long)addr % fdc->c_memalign);
   1818 #else
   1819 			doio_fprintf(stderr,
   1820 				     "write() failed:  %s\n\twrite(%d, %#o, %d)\n\toffset %d, nbytes%%1B=%d, oflags=%#o\n",
   1821 				     strerror(errno),
   1822 				     fd, addr, nbytes,
   1823 				     offset, nbytes % 4096, oflags);
   1824 #endif
   1825 			doio_upanic(U_RVAL);
   1826 		} else if (rval != nbytes) {
   1827 			doio_fprintf(stderr,
   1828 				     "write() returned wrong # bytes - expected %d, got %d\n%s\n",
   1829 				     nbytes, rval,
   1830 				     format_rw(req, fd, addr, -1, Pattern,
   1831 					       NULL));
   1832 			doio_upanic(U_RVAL);
   1833 			rval = -1;
   1834 		}
   1835 
   1836 		break;
   1837 
   1838 #ifdef CRAY
   1839 	case WRITEA:
   1840 		/*
   1841 		 * async write
   1842 		 */
   1843 		if (lseek(fd, offset, SEEK_SET) == -1) {
   1844 			doio_fprintf(stderr,
   1845 				     "lseek(%d, %d, SEEK_SET) failed:  %s (%d)\n",
   1846 				     fd, offset, SYSERR, errno);
   1847 			return -1;
   1848 		}
   1849 
   1850 		aio_strat = req->r_data.write.r_aio_strat;
   1851 		signo = (aio_strat == A_SIGNAL) ? SIGUSR1 : 0;
   1852 
   1853 		aio_id = aio_register(fd, aio_strat, signo);
   1854 		aiop = aio_slot(aio_id);
   1855 
   1856 		/*
   1857 		 * init iosw and do the async write
   1858 		 */
   1859 
   1860 		if (writea(fd, addr, nbytes, &aiop->iosw, signo) == -1) {
   1861 			doio_fprintf(stderr,
   1862 				     "writea() failed: %s (%d)\n%s\n",
   1863 				     SYSERR, errno,
   1864 				     format_rw(req, fd, addr, -1, Pattern,
   1865 					       NULL));
   1866 			doio_upanic(U_RVAL);
   1867 			aio_unregister(aio_id);
   1868 			rval = -1;
   1869 		} else {
   1870 
   1871 			/*
   1872 			 * Wait for io to complete
   1873 			 */
   1874 
   1875 			aio_wait(aio_id);
   1876 
   1877 			/*
   1878 			 * check that iosw is ok
   1879 			 */
   1880 
   1881 			if (aiop->iosw.sw_count != nbytes) {
   1882 				doio_fprintf(stderr,
   1883 					     "Bad iosw from writea()\nExpected (%d,%d,%d), got (%d,%d,%d)\n%s\n",
   1884 					     1, 0, nbytes,
   1885 					     aiop->iosw.sw_flag,
   1886 					     aiop->iosw.sw_error,
   1887 					     aiop->iosw.sw_count,
   1888 					     format_rw(req, fd, addr, -1,
   1889 						       Pattern, &aiop->iosw));
   1890 				aio_unregister(aio_id);
   1891 				doio_upanic(U_IOSW);
   1892 				rval = -1;
   1893 			} else {
   1894 				aio_unregister(aio_id);
   1895 				rval = 0;
   1896 			}
   1897 		}
   1898 		break;
   1899 
   1900 #endif /* CRAY */
   1901 	}
   1902 
   1903 	/*
   1904 	 * Verify that the data was written correctly - check_file() returns
   1905 	 * a non-null pointer which contains an error message if there are
   1906 	 * problems.
   1907 	 */
   1908 
   1909 	if (v_opt) {
   1910 		msg = check_file(file, offset, nbytes, Pattern, Pattern_Length,
   1911 				 0, oflags & O_PARALLEL);
   1912 		if (msg != NULL) {
   1913 			doio_fprintf(stderr, "%s%s\n", msg,
   1914 #ifdef CRAY
   1915 				     format_rw(req, fd, addr, -1, Pattern,
   1916 					       &aiop->iosw)
   1917 #else
   1918 				     format_rw(req, fd, addr, -1, Pattern, NULL)
   1919 #endif
   1920 			    );
   1921 			doio_upanic(U_CORRUPTION);
   1922 			exit(E_COMPARE);
   1923 
   1924 		}
   1925 	}
   1926 
   1927 	/*
   1928 	 * General cleanup ...
   1929 	 *
   1930 	 * Write extent information to the write-log, so that doio_check can do
   1931 	 * corruption detection.  Note that w_done is set to 1, indicating that
   1932 	 * the write has been verified as complete.  We don't need to write the
   1933 	 * filename on the second logging.
   1934 	 */
   1935 
   1936 	if (w_opt && logged_write) {
   1937 		wrec.w_done = 1;
   1938 		wlog_record_write(&Wlog, &wrec, woffset);
   1939 	}
   1940 
   1941 	/*
   1942 	 * Unlock file region if necessary
   1943 	 */
   1944 
   1945 	if (got_lock) {
   1946 		if (lock_file_region(file, fd, F_UNLCK, offset, nbytes) < 0) {
   1947 			alloc_mem(-1);
   1948 			exit(E_INTERNAL);
   1949 		}
   1950 	}
   1951 
   1952 	return ((rval == -1) ? -1 : 0);
   1953 }
   1954 
   1955 /*
   1956  * Simple routine to lock/unlock a file using fcntl()
   1957  */
   1958 
   1959 int lock_file_region(char *fname, int fd, int type, int start, int nbytes)
   1960 {
   1961 	struct flock flk;
   1962 
   1963 	flk.l_type = type;
   1964 	flk.l_whence = 0;
   1965 	flk.l_start = start;
   1966 	flk.l_len = nbytes;
   1967 
   1968 	if (fcntl(fd, F_SETLKW, &flk) < 0) {
   1969 		doio_fprintf(stderr,
   1970 			     "fcntl(%d, %d, %#o) failed for file %s, lock type %d, offset %d, length %d:  %s (%d), open flags: %#o\n",
   1971 			     fd, F_SETLKW, &flk, fname, type,
   1972 			     start, nbytes, SYSERR, errno,
   1973 			     fcntl(fd, F_GETFL, 0));
   1974 		return -1;
   1975 	}
   1976 
   1977 	return 0;
   1978 }
   1979 
   1980 /*
   1981  * Perform a listio request.
   1982  */
   1983 
   1984 #ifdef CRAY
   1985 char *format_listio(struct io_req *ioreq, int lcmd, struct listreq *list,
   1986 		    int nent, int fd, char *pattern)
   1987 {
   1988 	static char *errbuf = NULL;
   1989 	struct listio_req *liop = &ioreq->r_data.listio;
   1990 	struct listreq *listreq;
   1991 	char *cp, *cmd, *opcode, *aio_strat;
   1992 	int i;
   1993 
   1994 	switch (lcmd) {
   1995 	case LC_START:
   1996 		cmd = "LC_START";
   1997 		break;
   1998 	case LC_WAIT:
   1999 		cmd = "LC_WAIT";
   2000 		break;
   2001 	default:
   2002 		cmd = "???";
   2003 		break;
   2004 	}
   2005 
   2006 	if (errbuf == NULL)
   2007 		errbuf = malloc(32768);
   2008 
   2009 	cp = errbuf;
   2010 	cp += sprintf(cp, "Request number %d\n", Reqno);
   2011 
   2012 	cp += sprintf(cp, "syscall:  listio(%s, %#o, %d)\n\n", cmd, list, nent);
   2013 
   2014 	aio_strat = format_strat(liop->r_aio_strat);
   2015 
   2016 	for (i = 0; i < nent; i++) {
   2017 		cp += sprintf(cp, "struct lioreq for request element %d\n", i);
   2018 		cp += sprintf(cp, "----------------------------------------\n");
   2019 
   2020 		listreq = list + i;
   2021 
   2022 		switch (listreq->li_opcode) {
   2023 		case LO_READ:
   2024 			opcode = "LO_READ";
   2025 			break;
   2026 		case LO_WRITE:
   2027 			opcode = "LO_WRITE";
   2028 			break;
   2029 		default:
   2030 			opcode = "???";
   2031 			break;
   2032 		}
   2033 
   2034 		cp += sprintf(cp, "          li_opcode =    %s\n", opcode);
   2035 		cp +=
   2036 		    sprintf(cp, "          li_drvr =      %#o\n",
   2037 			    listreq->li_drvr);
   2038 		cp +=
   2039 		    sprintf(cp, "          li_flags =     %#o\n",
   2040 			    listreq->li_flags);
   2041 		cp +=
   2042 		    sprintf(cp, "          li_offset =    %d\n",
   2043 			    listreq->li_offset);
   2044 		cp +=
   2045 		    sprintf(cp, "          li_fildes =    %d\n",
   2046 			    listreq->li_fildes);
   2047 		cp +=
   2048 		    sprintf(cp, "          li_buf =       %#o\n",
   2049 			    listreq->li_buf);
   2050 		cp +=
   2051 		    sprintf(cp, "          li_nbyte =     %d\n",
   2052 			    listreq->li_nbyte);
   2053 		cp +=
   2054 		    sprintf(cp, "          li_status =    %#o (%d, %d, %d)\n",
   2055 			    listreq->li_status, listreq->li_status->sw_flag,
   2056 			    listreq->li_status->sw_error,
   2057 			    listreq->li_status->sw_count);
   2058 		cp +=
   2059 		    sprintf(cp, "          li_signo =     %d\n",
   2060 			    listreq->li_signo);
   2061 		cp +=
   2062 		    sprintf(cp, "          li_nstride =   %d\n",
   2063 			    listreq->li_nstride);
   2064 		cp +=
   2065 		    sprintf(cp, "          li_filstride = %d\n",
   2066 			    listreq->li_filstride);
   2067 		cp +=
   2068 		    sprintf(cp, "          li_memstride = %d\n",
   2069 			    listreq->li_memstride);
   2070 		cp +=
   2071 		    sprintf(cp, "          io completion strategy is %s\n",
   2072 			    aio_strat);
   2073 	}
   2074 	return errbuf;
   2075 }
   2076 #endif /* CRAY */
   2077 
   2078 int do_listio(struct io_req *req)
   2079 {
   2080 #ifdef CRAY
   2081 	struct listio_req *lio;
   2082 	int fd, oflags, signo, nb, i;
   2083 	int logged_write, rval, got_lock;
   2084 	int aio_strat, aio_id;
   2085 	int min_byte, max_byte;
   2086 	int mem_needed;
   2087 	int foffset, fstride, mstride, nstrides;
   2088 	char *moffset;
   2089 	long offset, woffset;
   2090 	char *addr, *msg;
   2091 	sigset_t block_mask, omask;
   2092 	struct wlog_rec wrec;
   2093 	struct aio_info *aiop;
   2094 	struct listreq lio_req;
   2095 
   2096 	lio = &req->r_data.listio;
   2097 
   2098 	/*
   2099 	 * If bytes per stride is less than the stride size, drop the request
   2100 	 * since it will cause overlapping strides, and we cannot predict
   2101 	 * the order they will complete in.
   2102 	 */
   2103 
   2104 	if (lio->r_filestride && abs(lio->r_filestride) < lio->r_nbytes) {
   2105 		doio_fprintf(stderr,
   2106 			     "do_listio():  Bogus listio request - abs(filestride) [%d] < nbytes [%d]\n",
   2107 			     abs(lio->r_filestride), lio->r_nbytes);
   2108 		return -1;
   2109 	}
   2110 
   2111 	/*
   2112 	 * Allocate core memory.  Initialize the data to be written.  Make
   2113 	 * sure we get enough, based on the memstride.
   2114 	 */
   2115 
   2116 	mem_needed =
   2117 	    stride_bounds(0, lio->r_memstride, lio->r_nstrides,
   2118 			  lio->r_nbytes, NULL, NULL);
   2119 
   2120 	if ((rval = alloc_mem(mem_needed + wtob(1))) < 0) {
   2121 		return rval;
   2122 	}
   2123 
   2124 	/*
   2125 	 * Set the memory address pointer.  If the io is not raw, adjust
   2126 	 * addr by a random amount, so that non-raw io is not necessarily
   2127 	 * word aligned.
   2128 	 */
   2129 
   2130 	addr = Memptr;
   2131 
   2132 	if (!(lio->r_uflags & F_WORD_ALIGNED)) {
   2133 		addr += random_range(0, wtob(1) - 1, 1, NULL);
   2134 	}
   2135 
   2136 	if (lio->r_opcode == LO_WRITE) {
   2137 		Pattern[0] = lio->r_pattern;
   2138 		(*Data_Fill) (Memptr, mem_needed, Pattern, Pattern_Length, 0);
   2139 		if (addr != Memptr)
   2140 			memmove(addr, Memptr, mem_needed);
   2141 	}
   2142 
   2143 	/*
   2144 	 * Get a descriptor to do the io on.  No need to do an lseek, as this
   2145 	 * is encoded in the listio request.
   2146 	 */
   2147 
   2148 	if ((fd = alloc_fd(lio->r_file, lio->r_oflags)) == -1) {
   2149 		return -1;
   2150 	}
   2151 
   2152 	rval = -1;
   2153 	got_lock = 0;
   2154 	logged_write = 0;
   2155 
   2156 	/*
   2157 	 * If the opcode is LO_WRITE, lock all regions of the file that
   2158 	 * are touched by this listio request.  Currently, we use
   2159 	 * stride_bounds() to figure out the min and max bytes affected, and
   2160 	 * lock the entire region, regardless of the file stride.
   2161 	 */
   2162 
   2163 	if (lio->r_opcode == LO_WRITE && k_opt) {
   2164 		stride_bounds(lio->r_offset,
   2165 			      lio->r_filestride, lio->r_nstrides,
   2166 			      lio->r_nbytes, &min_byte, &max_byte);
   2167 
   2168 		if (lock_file_region(lio->r_file, fd, F_WRLCK,
   2169 				     min_byte, (max_byte - min_byte + 1)) < 0) {
   2170 			doio_fprintf(stderr,
   2171 				     "stride_bounds(%d, %d, %d, %d, ..., ...) set min_byte to %d, max_byte to %d\n",
   2172 				     lio->r_offset, lio->r_filestride,
   2173 				     lio->r_nstrides, lio->r_nbytes, min_byte,
   2174 				     max_byte);
   2175 			return -1;
   2176 		} else {
   2177 			got_lock = 1;
   2178 		}
   2179 	}
   2180 
   2181 	/*
   2182 	 * async write
   2183 	 */
   2184 
   2185 	aio_strat = lio->r_aio_strat;
   2186 	signo = (aio_strat == A_SIGNAL) ? SIGUSR1 : 0;
   2187 
   2188 	aio_id = aio_register(fd, aio_strat, signo);
   2189 	aiop = aio_slot(aio_id);
   2190 
   2191 	/*
   2192 	 * Form the listio request, and make the call.
   2193 	 */
   2194 
   2195 	lio_req.li_opcode = lio->r_opcode;
   2196 	lio_req.li_drvr = 0;
   2197 	lio_req.li_flags = LF_LSEEK;
   2198 	lio_req.li_offset = lio->r_offset;
   2199 	lio_req.li_fildes = fd;
   2200 
   2201 	if (lio->r_memstride >= 0 || lio->r_nstrides <= 1) {
   2202 		lio_req.li_buf = addr;
   2203 	} else {
   2204 		lio_req.li_buf = addr + mem_needed - lio->r_nbytes;
   2205 	}
   2206 
   2207 	lio_req.li_nbyte = lio->r_nbytes;
   2208 	lio_req.li_status = &aiop->iosw;
   2209 	lio_req.li_signo = signo;
   2210 	lio_req.li_nstride = lio->r_nstrides;
   2211 	lio_req.li_filstride = lio->r_filestride;
   2212 	lio_req.li_memstride = lio->r_memstride;
   2213 
   2214 	/*
   2215 	 * If signo != 0, block signo while we're in the system call, so that
   2216 	 * we don't get interrupted syscall failures.
   2217 	 */
   2218 
   2219 	if (signo) {
   2220 		sigemptyset(&block_mask);
   2221 		sigaddset(&block_mask, signo);
   2222 		sigprocmask(SIG_BLOCK, &block_mask, &omask);
   2223 	}
   2224 
   2225 	if (listio(lio->r_cmd, &lio_req, 1) < 0) {
   2226 		doio_fprintf(stderr,
   2227 			     "listio() failed: %s (%d)\n%s\n",
   2228 			     SYSERR, errno,
   2229 			     format_listio(req, lio->r_cmd, &lio_req, 1, fd,
   2230 					   Pattern));
   2231 		aio_unregister(aio_id);
   2232 		doio_upanic(U_RVAL);
   2233 		goto lio_done;
   2234 	}
   2235 
   2236 	if (signo) {
   2237 		sigprocmask(SIG_SETMASK, &omask, NULL);
   2238 	}
   2239 
   2240 	/*
   2241 	 * Wait for io to complete
   2242 	 */
   2243 
   2244 	aio_wait(aio_id);
   2245 
   2246 	nstrides = lio->r_nstrides ? lio->r_nstrides : 1;
   2247 	if (aiop->iosw.sw_count != lio->r_nbytes * nstrides) {
   2248 		doio_fprintf(stderr,
   2249 			     "Bad iosw from listio()\nExpected (%d,%d,%d), got (%d,%d,%d)\n%s\n",
   2250 			     1, 0, lio->r_nbytes * lio->r_nstrides,
   2251 			     aiop->iosw.sw_flag,
   2252 			     aiop->iosw.sw_error, aiop->iosw.sw_count,
   2253 			     format_listio(req, lio->r_cmd, &lio_req, 1, fd,
   2254 					   Pattern));
   2255 		aio_unregister(aio_id);
   2256 		doio_upanic(U_IOSW);
   2257 		goto lio_done;
   2258 	}
   2259 
   2260 	aio_unregister(aio_id);
   2261 
   2262 	/*
   2263 	 * Verify that the data was written correctly - check_file() returns
   2264 	 * a non-null pointer which contains an error message if there are
   2265 	 * problems.
   2266 	 *
   2267 	 * For listio, we basically have to make 1 call to check_file for each
   2268 	 * stride.
   2269 	 */
   2270 
   2271 	if (v_opt && lio_req.li_opcode == LO_WRITE) {
   2272 		fstride = lio->r_filestride ? lio->r_filestride : lio->r_nbytes;
   2273 		mstride = lio->r_memstride ? lio->r_memstride : lio->r_nbytes;
   2274 		foffset = lio->r_offset;
   2275 
   2276 		if (mstride > 0 || lio->r_nstrides <= 1) {
   2277 			moffset = addr;
   2278 		} else {
   2279 			moffset = addr + mem_needed - lio->r_nbytes;
   2280 		}
   2281 
   2282 		for (i = 0; i < lio_req.li_nstride; i++) {
   2283 			msg = check_file(lio->r_file,
   2284 					 foffset, lio->r_nbytes,
   2285 					 Pattern, Pattern_Length,
   2286 					 moffset - addr,
   2287 					 lio->r_oflags & O_PARALLEL);
   2288 
   2289 			if (msg != NULL) {
   2290 				doio_fprintf(stderr, "%s\n%s\n",
   2291 					     msg,
   2292 					     format_listio(req, lio->r_cmd,
   2293 							   &lio_req, 1, fd,
   2294 							   Pattern));
   2295 				doio_upanic(U_CORRUPTION);
   2296 				exit(E_COMPARE);
   2297 			}
   2298 
   2299 			moffset += mstride;
   2300 			foffset += fstride;
   2301 		}
   2302 
   2303 	}
   2304 
   2305 	rval = 0;
   2306 
   2307 lio_done:
   2308 
   2309 	/*
   2310 	 * General cleanup ...
   2311 	 *
   2312 	 */
   2313 
   2314 	/*
   2315 	 * Release file locks if necessary
   2316 	 */
   2317 
   2318 	if (got_lock) {
   2319 		if (lock_file_region(lio->r_file, fd, F_UNLCK,
   2320 				     min_byte, (max_byte - min_byte + 1)) < 0) {
   2321 			return -1;
   2322 		}
   2323 	}
   2324 
   2325 	return rval;
   2326 #else
   2327 	return -1;
   2328 #endif
   2329 }
   2330 
   2331 /*
   2332  * perform ssread/sswrite operations
   2333  */
   2334 
   2335 #ifdef _CRAY1
   2336 
   2337 int do_ssdio(struct io_req *req)
   2338 {
   2339 	int nbytes, nb;
   2340 	char errbuf[BSIZE];
   2341 
   2342 	nbytes = req->r_data.ssread.r_nbytes;
   2343 
   2344 	/*
   2345 	 * Grab core and sds space
   2346 	 */
   2347 
   2348 	if ((nb = alloc_mem(nbytes)) < 0)
   2349 		return nb;
   2350 
   2351 	if (alloc_sds(nbytes) == -1)
   2352 		return -1;
   2353 
   2354 	if (req->r_type == SSWRITE) {
   2355 
   2356 		/*
   2357 		 * Init data and ship it to the ssd
   2358 		 */
   2359 
   2360 		Pattern[0] = req->r_data.sswrite.r_pattern;
   2361 		/*pattern_fill(Memptr, nbytes, Pattern, Pattern_Length, 0); */
   2362 		(*Data_Fill) (Memptr, nbytes, Pattern, Pattern_Length, 0);
   2363 
   2364 		if (sswrite((long)Memptr, (long)Sdsptr, btoc(nbytes)) == -1) {
   2365 			doio_fprintf(stderr, "sswrite() failed:  %s (%d)\n%s\n",
   2366 				     SYSERR, errno,
   2367 				     format_sds(req, Memptr, Sdsptr, Pattern));
   2368 			doio_upanic(U_RVAL);
   2369 			return -1;
   2370 		}
   2371 	} else {
   2372 		/*
   2373 		 * read from sds
   2374 		 */
   2375 
   2376 		if (ssread((long)Memptr, (long)Sdsptr, btoc(nbytes)) == -1) {
   2377 			doio_fprintf(stderr, "ssread() failed: %s (%d)\n%s\n",
   2378 				     SYSERR, errno,
   2379 				     format_sds(req, Memptr, Sdsptr, Pattern));
   2380 
   2381 			doio_upanic(U_RVAL);
   2382 			return -1;
   2383 		}
   2384 	}
   2385 
   2386 	/*
   2387 	 * Verify data if SSWRITE and v_opt
   2388 	 */
   2389 
   2390 	if (v_opt && req->r_type == SSWRITE) {
   2391 		ssread((long)Memptr, (long)Sdsptr, btoc(nbytes));
   2392 
   2393 		if (pattern_check(Memptr, nbytes, Pattern, Pattern_Length, 0) ==
   2394 		    -1) {
   2395 			doio_fprintf(stderr,
   2396 				     "sds DATA COMPARE ERROR - ABORTING\n%s\n",
   2397 				     format_sds(req, Memptr, Sdsptr, Pattern));
   2398 
   2399 			doio_upanic(U_CORRUPTION);
   2400 			exit(E_COMPARE);
   2401 		}
   2402 	}
   2403 }
   2404 
   2405 #else
   2406 
   2407 #ifdef CRAY
   2408 
   2409 int do_ssdio(struct io_req *req)
   2410 {
   2411 	doio_fprintf(stderr,
   2412 		     "Internal Error - do_ssdio() called on a non-cray1 system\n");
   2413 	alloc_mem(-1);
   2414 	exit(E_INTERNAL);
   2415 }
   2416 
   2417 #endif /* CRAY */
   2418 
   2419 #endif /* _CRAY1 */
   2420 
   2421 char *fmt_ioreq(struct io_req *ioreq, struct syscall_info *sy, int fd)
   2422 {
   2423 	static char *errbuf = NULL;
   2424 	char *cp;
   2425 	struct rw_req *io;
   2426 	struct smap *aname;
   2427 #ifdef CRAY
   2428 	struct stat sbuf;
   2429 #endif
   2430 #ifdef sgi
   2431 	struct dioattr finfo;
   2432 #endif
   2433 
   2434 	if (errbuf == NULL)
   2435 		errbuf = malloc(32768);
   2436 
   2437 	io = &ioreq->r_data.io;
   2438 
   2439 	/*
   2440 	 * Look up async I/O completion strategy
   2441 	 */
   2442 	for (aname = aionames;
   2443 	     aname->value != -1 && aname->value != io->r_aio_strat; aname++) ;
   2444 
   2445 	cp = errbuf;
   2446 	cp += sprintf(cp, "Request number %d\n", Reqno);
   2447 
   2448 	cp +=
   2449 	    sprintf(cp, "          fd %d is file %s - open flags are %#o %s\n",
   2450 		    fd, io->r_file, io->r_oflags, format_oflags(io->r_oflags));
   2451 
   2452 	if (sy->sy_flags & SY_WRITE) {
   2453 		cp +=
   2454 		    sprintf(cp,
   2455 			    "          write done at file offset %d - pattern is %c (%#o)\n",
   2456 			    io->r_offset,
   2457 			    (io->r_pattern == '\0') ? '?' : io->r_pattern,
   2458 			    io->r_pattern);
   2459 	} else {
   2460 		cp += sprintf(cp, "          read done at file offset %d\n",
   2461 			      io->r_offset);
   2462 	}
   2463 
   2464 	if (sy->sy_flags & SY_ASYNC) {
   2465 		cp +=
   2466 		    sprintf(cp,
   2467 			    "          async io completion strategy is %s\n",
   2468 			    aname->string);
   2469 	}
   2470 
   2471 	cp +=
   2472 	    sprintf(cp,
   2473 		    "          number of requests is %d, strides per request is %d\n",
   2474 		    io->r_nent, io->r_nstrides);
   2475 
   2476 	cp += sprintf(cp, "          i/o byte count = %d\n", io->r_nbytes);
   2477 
   2478 	cp += sprintf(cp, "          memory alignment is %s\n",
   2479 		      (io->
   2480 		       r_uflags & F_WORD_ALIGNED) ? "aligned" : "unaligned");
   2481 
   2482 #ifdef CRAY
   2483 	if (io->r_oflags & O_RAW) {
   2484 		cp +=
   2485 		    sprintf(cp,
   2486 			    "          RAW I/O: offset %% 4096 = %d length %% 4096 = %d\n",
   2487 			    io->r_offset % 4096, io->r_nbytes % 4096);
   2488 		fstat(fd, &sbuf);
   2489 		cp +=
   2490 		    sprintf(cp,
   2491 			    "          optimal file xfer size: small: %d large: %d\n",
   2492 			    sbuf.st_blksize, sbuf.st_oblksize);
   2493 		cp +=
   2494 		    sprintf(cp, "          cblks %d cbits %#o\n", sbuf.st_cblks,
   2495 			    sbuf.st_cbits);
   2496 	}
   2497 #endif
   2498 #ifdef sgi
   2499 	if (io->r_oflags & O_DIRECT) {
   2500 
   2501 		if (fcntl(fd, F_DIOINFO, &finfo) == -1) {
   2502 			cp +=
   2503 			    sprintf(cp,
   2504 				    "          Error %s (%d) getting direct I/O info\n",
   2505 				    strerror(errno), errno);
   2506 			finfo.d_mem = 1;
   2507 			finfo.d_miniosz = 1;
   2508 			finfo.d_maxiosz = 1;
   2509 		}
   2510 
   2511 		cp +=
   2512 		    sprintf(cp,
   2513 			    "          DIRECT I/O: offset %% %d = %d length %% %d = %d\n",
   2514 			    finfo.d_miniosz, io->r_offset % finfo.d_miniosz,
   2515 			    io->r_nbytes, io->r_nbytes % finfo.d_miniosz);
   2516 		cp +=
   2517 		    sprintf(cp,
   2518 			    "          mem alignment 0x%x xfer size: small: %d large: %d\n",
   2519 			    finfo.d_mem, finfo.d_miniosz, finfo.d_maxiosz);
   2520 	}
   2521 #endif
   2522 
   2523 	return (errbuf);
   2524 }
   2525 
   2526 /*
   2527  * Issue listio requests
   2528  */
   2529 #ifdef CRAY
   2530 struct status *sy_listio(struct io_req *req, struct syscall_info *sysc, int fd,
   2531 			 char *addr)
   2532 {
   2533 	int offset, nbytes, nstrides, nents, aio_strat;
   2534 	int aio_id, signo, o, i, lc;
   2535 	char *a;
   2536 	struct listreq *lio_req, *l;
   2537 	struct aio_info *aiop;
   2538 	struct status *status;
   2539 
   2540 	/*
   2541 	 * Initialize common fields - assumes r_oflags, r_file, r_offset, and
   2542 	 * r_nbytes are at the same offset in the read_req and reada_req
   2543 	 * structures.
   2544 	 */
   2545 	offset = req->r_data.io.r_offset;
   2546 	nbytes = req->r_data.io.r_nbytes;
   2547 	nstrides = req->r_data.io.r_nstrides;
   2548 	nents = req->r_data.io.r_nent;
   2549 	aio_strat = req->r_data.io.r_aio_strat;
   2550 
   2551 	lc = (sysc->sy_flags & SY_ASYNC) ? LC_START : LC_WAIT;
   2552 
   2553 	status = malloc(sizeof(struct status));
   2554 	if (status == NULL) {
   2555 		doio_fprintf(stderr, "malloc failed, %s/%d\n",
   2556 			     __FILE__, __LINE__);
   2557 		return NULL;
   2558 	}
   2559 	status->aioid = malloc((nents + 1) * sizeof(int));
   2560 	if (status->aioid == NULL) {
   2561 		doio_fprintf(stderr, "malloc failed, %s/%d\n",
   2562 			     __FILE__, __LINE__);
   2563 		return NULL;
   2564 	}
   2565 
   2566 	signo = (aio_strat == A_SIGNAL) ? SIGUSR1 : 0;
   2567 
   2568 	lio_req = malloc(nents * sizeof(struct listreq));
   2569 	if (lio_req == NULL) {
   2570 		doio_fprintf(stderr, "malloc failed, %s/%d\n",
   2571 			     __FILE__, __LINE__);
   2572 		return NULL;
   2573 	}
   2574 	for (l = lio_req, a = addr, o = offset, i = 0;
   2575 	     i < nents; l++, a += nbytes, o += nbytes, i++) {
   2576 
   2577 		aio_id = aio_register(fd, aio_strat, signo);
   2578 		aiop = aio_slot(aio_id);
   2579 		status->aioid[i] = aio_id;
   2580 
   2581 		l->li_opcode = (sysc->sy_flags & SY_WRITE) ? LO_WRITE : LO_READ;
   2582 		l->li_offset = o;
   2583 		l->li_fildes = fd;
   2584 		l->li_buf = a;
   2585 		l->li_nbyte = nbytes;
   2586 		l->li_status = &aiop->iosw;
   2587 		l->li_signo = signo;
   2588 		l->li_nstride = nstrides;
   2589 		l->li_filstride = 0;
   2590 		l->li_memstride = 0;
   2591 		l->li_drvr = 0;
   2592 		l->li_flags = LF_LSEEK;
   2593 	}
   2594 
   2595 	status->aioid[nents] = -1;	/* end sentinel */
   2596 
   2597 	if ((status->rval = listio(lc, lio_req, nents)) == -1) {
   2598 		status->err = errno;
   2599 	}
   2600 
   2601 	free(lio_req);
   2602 	return (status);
   2603 }
   2604 
   2605 /*
   2606  * Calculate the size of a request in bytes and min/max boundaries
   2607  *
   2608  * This assumes filestride & memstride = 0.
   2609  */
   2610 int listio_mem(struct io_req *req, int offset, int fmstride, int *min, int *max)
   2611 {
   2612 	int i, size;
   2613 
   2614 	size = stride_bounds(offset, fmstride,
   2615 			     req->r_data.io.r_nstrides * req->r_data.io.r_nent,
   2616 			     req->r_data.io.r_nbytes, min, max);
   2617 	return (size);
   2618 }
   2619 
   2620 char *fmt_listio(struct io_req *req, struct syscall_info *sy, int fd,
   2621 		 char *addr)
   2622 {
   2623 	static char *errbuf = NULL;
   2624 	char *cp;
   2625 	char *c, *opcode;
   2626 	int i;
   2627 
   2628 	if (errbuf == NULL) {
   2629 		errbuf = malloc(32768);
   2630 		if (errbuf == NULL) {
   2631 			doio_fprintf(stderr, "malloc failed, %s/%d\n",
   2632 				     __FILE__, __LINE__);
   2633 			return NULL;
   2634 		}
   2635 	}
   2636 
   2637 	c = (sy->sy_flags & SY_ASYNC) ? "lc_wait" : "lc_start";
   2638 
   2639 	cp = errbuf;
   2640 	cp += sprintf(cp, "syscall:  listio(%s, (?), %d)\n",
   2641 		      c, req->r_data.io.r_nent);
   2642 
   2643 	cp += sprintf(cp, "          data buffer at %#o\n", addr);
   2644 
   2645 	return (errbuf);
   2646 }
   2647 #endif /* CRAY */
   2648 
   2649 #ifdef sgi
   2650 struct status *sy_pread(struct io_req *req, struct syscall_info *sysc, int fd,
   2651 			char *addr)
   2652 {
   2653 	int rc;
   2654 	struct status *status;
   2655 
   2656 	rc = pread(fd, addr, req->r_data.io.r_nbytes, req->r_data.io.r_offset);
   2657 
   2658 	status = malloc(sizeof(struct status));
   2659 	if (status == NULL) {
   2660 		doio_fprintf(stderr, "malloc failed, %s/%d\n",
   2661 			     __FILE__, __LINE__);
   2662 		return NULL;
   2663 	}
   2664 	status->aioid = NULL;
   2665 	status->rval = rc;
   2666 	status->err = errno;
   2667 
   2668 	return (status);
   2669 }
   2670 
   2671 struct status *sy_pwrite(struct io_req *req, struct syscall_info *sysc, int fd,
   2672 			 char *addr)
   2673 {
   2674 	int rc;
   2675 	struct status *status;
   2676 
   2677 	rc = pwrite(fd, addr, req->r_data.io.r_nbytes, req->r_data.io.r_offset);
   2678 
   2679 	status = malloc(sizeof(struct status));
   2680 	if (status == NULL) {
   2681 		doio_fprintf(stderr, "malloc failed, %s/%d\n",
   2682 			     __FILE__, __LINE__);
   2683 		return NULL;
   2684 	}
   2685 	status->aioid = NULL;
   2686 	status->rval = rc;
   2687 	status->err = errno;
   2688 
   2689 	return (status);
   2690 }
   2691 
   2692 char *fmt_pread(struct io_req *req, struct syscall_info *sy, int fd, char *addr)
   2693 {
   2694 	static char *errbuf = NULL;
   2695 	char *cp;
   2696 
   2697 	if (errbuf == NULL) {
   2698 		errbuf = malloc(32768);
   2699 		if (errbuf == NULL) {
   2700 			doio_fprintf(stderr, "malloc failed, %s/%d\n",
   2701 				     __FILE__, __LINE__);
   2702 			return NULL;
   2703 		}
   2704 	}
   2705 
   2706 	cp = errbuf;
   2707 	cp += sprintf(cp, "syscall:  %s(%d, 0x%lx, %d)\n",
   2708 		      sy->sy_name, fd, addr, req->r_data.io.r_nbytes);
   2709 	return (errbuf);
   2710 }
   2711 #endif /* sgi */
   2712 
   2713 #ifndef CRAY
   2714 struct status *sy_readv(struct io_req *req, struct syscall_info *sysc, int fd,
   2715 			char *addr)
   2716 {
   2717 	struct status *sy_rwv();
   2718 	return sy_rwv(req, sysc, fd, addr, 0);
   2719 }
   2720 
   2721 struct status *sy_writev(struct io_req *req, struct syscall_info *sysc, int fd,
   2722 			 char *addr)
   2723 {
   2724 	struct status *sy_rwv();
   2725 	return sy_rwv(req, sysc, fd, addr, 1);
   2726 }
   2727 
   2728 struct status *sy_rwv(struct io_req *req, struct syscall_info *sysc, int fd,
   2729 		      char *addr, int rw)
   2730 {
   2731 	int rc;
   2732 	struct status *status;
   2733 	struct iovec iov[2];
   2734 
   2735 	status = malloc(sizeof(struct status));
   2736 	if (status == NULL) {
   2737 		doio_fprintf(stderr, "malloc failed, %s/%d\n",
   2738 			     __FILE__, __LINE__);
   2739 		return NULL;
   2740 	}
   2741 	status->aioid = NULL;
   2742 
   2743 	/* move to the desired file position. */
   2744 	if ((rc = lseek(fd, req->r_data.io.r_offset, SEEK_SET)) == -1) {
   2745 		status->rval = rc;
   2746 		status->err = errno;
   2747 		return (status);
   2748 	}
   2749 
   2750 	iov[0].iov_base = addr;
   2751 	iov[0].iov_len = req->r_data.io.r_nbytes;
   2752 
   2753 	if (rw)
   2754 		rc = writev(fd, iov, 1);
   2755 	else
   2756 		rc = readv(fd, iov, 1);
   2757 	status->aioid = NULL;
   2758 	status->rval = rc;
   2759 	status->err = errno;
   2760 	return (status);
   2761 }
   2762 
   2763 char *fmt_readv(struct io_req *req, struct syscall_info *sy, int fd, char *addr)
   2764 {
   2765 	static char errbuf[32768];
   2766 	char *cp;
   2767 
   2768 	cp = errbuf;
   2769 	cp += sprintf(cp, "syscall:  %s(%d, (iov on stack), 1)\n",
   2770 		      sy->sy_name, fd);
   2771 	return (errbuf);
   2772 }
   2773 #endif /* !CRAY */
   2774 
   2775 #ifdef sgi
   2776 struct status *sy_aread(struct io_req *req, struct syscall_info *sysc, int fd,
   2777 			char *addr)
   2778 {
   2779 	struct status *sy_arw();
   2780 	return sy_arw(req, sysc, fd, addr, 0);
   2781 }
   2782 
   2783 struct status *sy_awrite(struct io_req *req, struct syscall_info *sysc, int fd,
   2784 			 char *addr)
   2785 {
   2786 	struct status *sy_arw();
   2787 	return sy_arw(req, sysc, fd, addr, 1);
   2788 }
   2789 
   2790 /*
   2791   #define sy_aread(A, B, C, D)	sy_arw(A, B, C, D, 0)
   2792   #define sy_awrite(A, B, C, D)	sy_arw(A, B, C, D, 1)
   2793  */
   2794 
   2795 struct status *sy_arw(struct io_req *req, struct syscall_info *sysc, int fd,
   2796 		      char *addr, int rw)
   2797 {
   2798 	/* POSIX 1003.1b-1993 Async read */
   2799 	struct status *status;
   2800 	int rc;
   2801 	int aio_id, aio_strat, signo;
   2802 	struct aio_info *aiop;
   2803 
   2804 	status = malloc(sizeof(struct status));
   2805 	if (status == NULL) {
   2806 		doio_fprintf(stderr, "malloc failed, %s/%d\n",
   2807 			     __FILE__, __LINE__);
   2808 		return NULL;
   2809 	}
   2810 	aio_strat = req->r_data.io.r_aio_strat;
   2811 	signo = (aio_strat == A_SIGNAL) ? SIGUSR1 : 0;
   2812 
   2813 	aio_id = aio_register(fd, aio_strat, signo);
   2814 	aiop = aio_slot(aio_id);
   2815 
   2816 	memset((void *)&aiop->aiocb, 0, sizeof(aiocb_t));
   2817 
   2818 	aiop->aiocb.aio_fildes = fd;
   2819 	aiop->aiocb.aio_nbytes = req->r_data.io.r_nbytes;
   2820 	aiop->aiocb.aio_offset = req->r_data.io.r_offset;
   2821 	aiop->aiocb.aio_buf = addr;
   2822 	aiop->aiocb.aio_reqprio = 0;	/* must be 0 */
   2823 	aiop->aiocb.aio_lio_opcode = 0;
   2824 
   2825 	if (aio_strat == A_SIGNAL) {	/* siginfo(2) stuff */
   2826 		aiop->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
   2827 		aiop->aiocb.aio_sigevent.sigev_signo = signo;
   2828 	} else if (aio_strat == A_CALLBACK) {
   2829 		aiop->aiocb.aio_sigevent.sigev_signo = 0;
   2830 		aiop->aiocb.aio_sigevent.sigev_notify = SIGEV_CALLBACK;
   2831 		aiop->aiocb.aio_sigevent.sigev_func = cb_handler;
   2832 		aiop->aiocb.aio_sigevent.sigev_value.sival_int = aio_id;
   2833 	} else {
   2834 		aiop->aiocb.aio_sigevent.sigev_notify = SIGEV_NONE;
   2835 		aiop->aiocb.aio_sigevent.sigev_signo = 0;
   2836 	}
   2837 
   2838 	if (rw)
   2839 		rc = aio_write(&aiop->aiocb);
   2840 	else
   2841 		rc = aio_read(&aiop->aiocb);
   2842 
   2843 	status->aioid = malloc(2 * sizeof(int));
   2844 	if (status->aioid == NULL) {
   2845 		doio_fprintf(stderr, "malloc failed, %s/%d\n",
   2846 			     __FILE__, __LINE__);
   2847 		return NULL;
   2848 	}
   2849 	status->aioid[0] = aio_id;
   2850 	status->aioid[1] = -1;
   2851 	status->rval = rc;
   2852 	status->err = errno;
   2853 	return (status);
   2854 }
   2855 
   2856 char *fmt_aread(struct io_req *req, struct syscall_info *sy, int fd, char *addr)
   2857 {
   2858 	static char errbuf[32768];
   2859 	char *cp;
   2860 
   2861 	cp = errbuf;
   2862 	cp += sprintf(cp, "syscall:  %s(&aiop->aiocb)\n", sy->sy_name);
   2863 	return (errbuf);
   2864 }
   2865 #endif /* sgi */
   2866 
   2867 #ifndef CRAY
   2868 
   2869 struct status *sy_mmread(struct io_req *req, struct syscall_info *sysc, int fd,
   2870 			 char *addr)
   2871 {
   2872 	struct status *sy_mmrw();
   2873 	return sy_mmrw(req, sysc, fd, addr, 0);
   2874 }
   2875 
   2876 struct status *sy_mmwrite(struct io_req *req, struct syscall_info *sysc, int fd,
   2877 			  char *addr)
   2878 {
   2879 	struct status *sy_mmrw();
   2880 	return sy_mmrw(req, sysc, fd, addr, 1);
   2881 }
   2882 
   2883 struct status *sy_mmrw(struct io_req *req, struct syscall_info *sysc, int fd,
   2884 		       char *addr, int rw)
   2885 {
   2886 	/*
   2887 	 * mmap read/write
   2888 	 * This version is oriented towards mmaping the file to memory
   2889 	 * ONCE and keeping it mapped.
   2890 	 */
   2891 	struct status *status;
   2892 	void *mrc = NULL, *memaddr = NULL;
   2893 	struct fd_cache *fdc;
   2894 	struct stat sbuf;
   2895 	int rc;
   2896 
   2897 	status = malloc(sizeof(struct status));
   2898 	if (status == NULL) {
   2899 		doio_fprintf(stderr, "malloc failed, %s/%d\n",
   2900 			     __FILE__, __LINE__);
   2901 		return NULL;
   2902 	}
   2903 	status->aioid = NULL;
   2904 	status->rval = -1;
   2905 
   2906 	fdc = alloc_fdcache(req->r_data.io.r_file, req->r_data.io.r_oflags);
   2907 
   2908 	if (v_opt || fdc->c_memaddr == NULL) {
   2909 		if (fstat(fd, &sbuf) < 0) {
   2910 			doio_fprintf(stderr, "fstat failed, errno=%d\n", errno);
   2911 			status->err = errno;
   2912 			return (status);
   2913 		}
   2914 
   2915 		fdc->c_memlen = (int)sbuf.st_size;
   2916 		mrc = mmap(NULL, (int)sbuf.st_size,
   2917 			   rw ? PROT_WRITE | PROT_READ : PROT_READ,
   2918 			   MAP_SHARED, fd, 0);
   2919 
   2920 		if (mrc == MAP_FAILED) {
   2921 			doio_fprintf(stderr, "mmap() failed - 0x%lx %d\n",
   2922 				     mrc, errno);
   2923 			status->err = errno;
   2924 			return (status);
   2925 		}
   2926 
   2927 		fdc->c_memaddr = mrc;
   2928 	}
   2929 
   2930 	memaddr = (void *)((char *)fdc->c_memaddr + req->r_data.io.r_offset);
   2931 
   2932 	active_mmap_rw = 1;
   2933 	if (rw)
   2934 		memcpy(memaddr, addr, req->r_data.io.r_nbytes);
   2935 	else
   2936 		memcpy(addr, memaddr, req->r_data.io.r_nbytes);
   2937 	if (v_opt)
   2938 		msync(fdc->c_memaddr, (int)sbuf.st_size, MS_SYNC);
   2939 	active_mmap_rw = 0;
   2940 
   2941 	status->rval = req->r_data.io.r_nbytes;
   2942 	status->err = 0;
   2943 
   2944 	if (v_opt) {
   2945 		rc = munmap(mrc, (int)sbuf.st_size);
   2946 	}
   2947 
   2948 	return (status);
   2949 }
   2950 
   2951 char *fmt_mmrw(struct io_req *req, struct syscall_info *sy, int fd, char *addr)
   2952 {
   2953 	static char errbuf[32768];
   2954 	char *cp;
   2955 	struct fd_cache *fdc;
   2956 	void *memaddr;
   2957 
   2958 	fdc = alloc_fdcache(req->r_data.io.r_file, req->r_data.io.r_oflags);
   2959 
   2960 	cp = errbuf;
   2961 	cp += sprintf(cp, "syscall:  %s(NULL, %d, %s, MAP_SHARED, %d, 0)\n",
   2962 		      sy->sy_name,
   2963 		      fdc->c_memlen,
   2964 		      (sy->sy_flags & SY_WRITE) ? "PROT_WRITE" : "PROT_READ",
   2965 		      fd);
   2966 
   2967 	cp += sprintf(cp, "\tfile is mmaped to: 0x%lx\n",
   2968 		      (unsigned long)fdc->c_memaddr);
   2969 
   2970 	memaddr = (void *)((char *)fdc->c_memaddr + req->r_data.io.r_offset);
   2971 
   2972 	cp += sprintf(cp, "\tfile-mem=0x%lx, length=%d, buffer=0x%lx\n",
   2973 		      (unsigned long)memaddr, req->r_data.io.r_nbytes,
   2974 		      (unsigned long)addr);
   2975 
   2976 	return (errbuf);
   2977 }
   2978 #endif /* !CRAY */
   2979 
   2980 struct syscall_info syscalls[] = {
   2981 #ifdef CRAY
   2982 	{"listio-read-sync", LREAD,
   2983 	 sy_listio, NULL, fmt_listio,
   2984 	 SY_IOSW},
   2985 	{"listio-read-strides-sync", LSREAD,
   2986 	 sy_listio, listio_mem, fmt_listio,
   2987 	 SY_IOSW},
   2988 	{"listio-read-reqs-sync", LEREAD,
   2989 	 sy_listio, listio_mem, fmt_listio,
   2990 	 SY_IOSW},
   2991 	{"listio-read-async", LREADA,
   2992 	 sy_listio, NULL, fmt_listio,
   2993 	 SY_IOSW | SY_ASYNC},
   2994 	{"listio-read-strides-async", LSREADA,
   2995 	 sy_listio, listio_mem, fmt_listio,
   2996 	 SY_IOSW | SY_ASYNC},
   2997 	{"listio-read-reqs-async", LEREADA,
   2998 	 sy_listio, listio_mem, fmt_listio,
   2999 	 SY_IOSW | SY_ASYNC},
   3000 	{"listio-write-sync", LWRITE,
   3001 	 sy_listio, listio_mem, fmt_listio,
   3002 	 SY_IOSW | SY_WRITE},
   3003 	{"listio-write-strides-sync", LSWRITE,
   3004 	 sy_listio, listio_mem, fmt_listio,
   3005 	 SY_IOSW | SY_WRITE},
   3006 	{"listio-write-reqs-sync", LEWRITE,
   3007 	 sy_listio, listio_mem, fmt_listio,
   3008 	 SY_IOSW | SY_WRITE},
   3009 	{"listio-write-async", LWRITEA,
   3010 	 sy_listio, listio_mem, fmt_listio,
   3011 	 SY_IOSW | SY_WRITE | SY_ASYNC},
   3012 	{"listio-write-strides-async", LSWRITEA,
   3013 	 sy_listio, listio_mem, fmt_listio,
   3014 	 SY_IOSW | SY_WRITE | SY_ASYNC},
   3015 	{"listio-write-reqs-async", LEWRITEA,
   3016 	 sy_listio, listio_mem, fmt_listio,
   3017 	 SY_IOSW | SY_WRITE | SY_ASYNC},
   3018 #endif
   3019 
   3020 #ifdef sgi
   3021 	{"aread", AREAD,
   3022 	 sy_aread, NULL, fmt_aread,
   3023 	 SY_IOSW | SY_ASYNC},
   3024 	{"awrite", AWRITE,
   3025 	 sy_awrite, NULL, fmt_aread,
   3026 	 SY_IOSW | SY_WRITE | SY_ASYNC},
   3027 	{"pread", PREAD,
   3028 	 sy_pread, NULL, fmt_pread,
   3029 	 0},
   3030 	{"pwrite", PWRITE,
   3031 	 sy_pwrite, NULL, fmt_pread,
   3032 	 SY_WRITE},
   3033 #endif
   3034 
   3035 #ifndef CRAY
   3036 	{"readv", READV,
   3037 	 sy_readv, NULL, fmt_readv,
   3038 	 0},
   3039 	{"writev", WRITEV,
   3040 	 sy_writev, NULL, fmt_readv,
   3041 	 SY_WRITE},
   3042 	{"mmap-read", MMAPR,
   3043 	 sy_mmread, NULL, fmt_mmrw,
   3044 	 0},
   3045 	{"mmap-write", MMAPW,
   3046 	 sy_mmwrite, NULL, fmt_mmrw,
   3047 	 SY_WRITE},
   3048 #endif
   3049 
   3050 	{NULL, 0,
   3051 	 0, 0, 0,
   3052 	 0},
   3053 };
   3054 
   3055 int do_rw(struct io_req *req)
   3056 {
   3057 	static int pid = -1;
   3058 	int fd, offset, nbytes, nstrides, nents, oflags;
   3059 	int rval, mem_needed, i;
   3060 	int logged_write, got_lock, pattern;
   3061 	off_t woffset;
   3062 	int min_byte, max_byte;
   3063 	char *addr, *file, *msg;
   3064 	struct status *s;
   3065 	struct wlog_rec wrec;
   3066 	struct syscall_info *sy;
   3067 #if defined(CRAY) || defined(sgi)
   3068 	struct aio_info *aiop;
   3069 	struct iosw *iosw;
   3070 #endif
   3071 #ifdef sgi
   3072 	struct fd_cache *fdc;
   3073 #endif
   3074 
   3075 	woffset = 0;
   3076 
   3077 	/*
   3078 	 * Initialize common fields - assumes r_oflags, r_file, r_offset, and
   3079 	 * r_nbytes are at the same offset in the read_req and reada_req
   3080 	 * structures.
   3081 	 */
   3082 	file = req->r_data.io.r_file;
   3083 	oflags = req->r_data.io.r_oflags;
   3084 	offset = req->r_data.io.r_offset;
   3085 	nbytes = req->r_data.io.r_nbytes;
   3086 	nstrides = req->r_data.io.r_nstrides;
   3087 	nents = req->r_data.io.r_nent;
   3088 	pattern = req->r_data.io.r_pattern;
   3089 
   3090 	if (nents >= MAX_AIO) {
   3091 		doio_fprintf(stderr,
   3092 			     "do_rw: too many list requests, %d.  Maximum is %d\n",
   3093 			     nents, MAX_AIO);
   3094 		return (-1);
   3095 	}
   3096 
   3097 	/*
   3098 	 * look up system call info
   3099 	 */
   3100 	for (sy = syscalls; sy->sy_name != NULL && sy->sy_type != req->r_type;
   3101 	     sy++) ;
   3102 
   3103 	if (sy->sy_name == NULL) {
   3104 		doio_fprintf(stderr, "do_rw: unknown r_type %d.\n",
   3105 			     req->r_type);
   3106 		return (-1);
   3107 	}
   3108 
   3109 	/*
   3110 	 * Get an open file descriptor
   3111 	 * Note: must be done before memory allocation so that the direct i/o
   3112 	 *      information is available in mem. allocate
   3113 	 */
   3114 
   3115 	if ((fd = alloc_fd(file, oflags)) == -1)
   3116 		return -1;
   3117 
   3118 	/*
   3119 	 * Allocate core memory and possibly sds space.  Initialize the
   3120 	 * data to be written.  Make sure we get enough, based on the
   3121 	 * memstride.
   3122 	 *
   3123 	 * need:
   3124 	 *      1 extra word for possible partial-word address "bump"
   3125 	 *      1 extra word for dynamic pattern overrun
   3126 	 *      MPP_BUMP extra words for T3E non-hw-aligned memory address.
   3127 	 */
   3128 
   3129 	if (sy->sy_buffer != NULL) {
   3130 		mem_needed = (*sy->sy_buffer) (req, 0, 0, NULL, NULL);
   3131 	} else {
   3132 		mem_needed = nbytes;
   3133 	}
   3134 
   3135 #ifdef CRAY
   3136 	if ((rval =
   3137 	     alloc_mem(mem_needed + wtob(1) * 2 +
   3138 		       MPP_BUMP * sizeof(UINT64_T))) < 0) {
   3139 		return rval;
   3140 	}
   3141 #else
   3142 #ifdef sgi
   3143 	/* get memory alignment for using DIRECT I/O */
   3144 	fdc = alloc_fdcache(file, oflags);
   3145 
   3146 	if ((rval = alloc_mem(mem_needed + wtob(1) * 2 + fdc->c_memalign)) < 0) {
   3147 		return rval;
   3148 	}
   3149 #else
   3150 	/* what is !CRAY && !sgi ? */
   3151 	if ((rval = alloc_mem(mem_needed + wtob(1) * 2)) < 0) {
   3152 		return rval;
   3153 	}
   3154 #endif /* sgi */
   3155 #endif /* CRAY */
   3156 
   3157 	Pattern[0] = pattern;
   3158 
   3159 	/*
   3160 	 * Allocate SDS space for backdoor write if desired
   3161 	 */
   3162 
   3163 	if (oflags & O_SSD) {
   3164 #ifdef CRAY
   3165 #ifndef _CRAYMPP
   3166 		if (alloc_sds(nbytes) == -1)
   3167 			return -1;
   3168 
   3169 		if (sy->sy_flags & SY_WRITE) {
   3170 			/*pattern_fill(Memptr, mem_needed, Pattern, Pattern_Length, 0); */
   3171 			(*Data_Fill) (Memptr, nbytes, Pattern, Pattern_Length,
   3172 				      0);
   3173 
   3174 			if (sswrite((long)Memptr, Sdsptr, btoc(mem_needed)) ==
   3175 			    -1) {
   3176 				doio_fprintf(stderr,
   3177 					     "sswrite(%d, %d, %d) failed:  %s (%d)\n",
   3178 					     (long)Memptr, Sdsptr,
   3179 					     btoc(mem_needed), SYSERR, errno);
   3180 				fflush(stderr);
   3181 				return -1;
   3182 			}
   3183 		}
   3184 
   3185 		addr = (char *)Sdsptr;
   3186 #else
   3187 		doio_fprintf(stderr,
   3188 			     "Invalid O_SSD flag was generated for MPP system\n");
   3189 		fflush(stderr);
   3190 		return -1;
   3191 #endif /* _CRAYMPP */
   3192 #else /* CRAY */
   3193 		doio_fprintf(stderr,
   3194 			     "Invalid O_SSD flag was generated for non-Cray system\n");
   3195 		fflush(stderr);
   3196 		return -1;
   3197 #endif /* CRAY */
   3198 	} else {
   3199 		addr = Memptr;
   3200 
   3201 		/*
   3202 		 * if io is not raw, bump the offset by a random amount
   3203 		 * to generate non-word-aligned io.
   3204 		 *
   3205 		 * On MPP systems, raw I/O must start on an 0x80 byte boundary.
   3206 		 * For non-aligned I/O, bump the address from 1 to 8 words.
   3207 		 */
   3208 
   3209 		if (!(req->r_data.io.r_uflags & F_WORD_ALIGNED)) {
   3210 #ifdef _CRAYMPP
   3211 			addr +=
   3212 			    random_range(0, MPP_BUMP, 1, NULL) * sizeof(int);
   3213 #endif
   3214 			addr += random_range(0, wtob(1) - 1, 1, NULL);
   3215 		}
   3216 #ifdef sgi
   3217 		/*
   3218 		 * Force memory alignment for Direct I/O
   3219 		 */
   3220 		if ((oflags & O_DIRECT) && ((long)addr % fdc->c_memalign != 0)) {
   3221 			addr +=
   3222 			    fdc->c_memalign - ((long)addr % fdc->c_memalign);
   3223 		}
   3224 #endif
   3225 
   3226 		/*
   3227 		 * FILL must be done on a word-aligned buffer.
   3228 		 * Call the fill function with Memptr which is aligned,
   3229 		 * then memmove it to the right place.
   3230 		 */
   3231 		if (sy->sy_flags & SY_WRITE) {
   3232 			(*Data_Fill) (Memptr, mem_needed, Pattern,
   3233 				      Pattern_Length, 0);
   3234 			if (addr != Memptr)
   3235 				memmove(addr, Memptr, mem_needed);
   3236 		}
   3237 	}
   3238 
   3239 	rval = 0;
   3240 	got_lock = 0;
   3241 	logged_write = 0;
   3242 
   3243 	/*
   3244 	 * Lock data if this is a write and locking option is set
   3245 	 */
   3246 	if (sy->sy_flags & SY_WRITE && k_opt) {
   3247 		if (sy->sy_buffer != NULL) {
   3248 			(*sy->sy_buffer) (req, offset, 0, &min_byte, &max_byte);
   3249 		} else {
   3250 			min_byte = offset;
   3251 			max_byte = offset + (nbytes * nstrides * nents);
   3252 		}
   3253 
   3254 		if (lock_file_region(file, fd, F_WRLCK,
   3255 				     min_byte, (max_byte - min_byte + 1)) < 0) {
   3256 			doio_fprintf(stderr,
   3257 				     "file lock failed:\n%s\n",
   3258 				     fmt_ioreq(req, sy, fd));
   3259 			doio_fprintf(stderr,
   3260 				     "          buffer(req, %d, 0, 0x%x, 0x%x)\n",
   3261 				     offset, min_byte, max_byte);
   3262 			alloc_mem(-1);
   3263 			exit(E_INTERNAL);
   3264 		}
   3265 
   3266 		got_lock = 1;
   3267 	}
   3268 
   3269 	/*
   3270 	 * Write a preliminary write-log entry.  This is done so that
   3271 	 * doio_check can do corruption detection across an interrupt/crash.
   3272 	 * Note that w_done is set to 0.  If doio_check sees this, it
   3273 	 * re-creates the file extents as if the write completed, but does not
   3274 	 * do any checking - see comments in doio_check for more details.
   3275 	 */
   3276 
   3277 	if (sy->sy_flags & SY_WRITE && w_opt) {
   3278 		if (pid == -1) {
   3279 			pid = getpid();
   3280 		}
   3281 
   3282 		wrec.w_async = (sy->sy_flags & SY_ASYNC) ? 1 : 0;
   3283 		wrec.w_oflags = oflags;
   3284 		wrec.w_pid = pid;
   3285 		wrec.w_offset = offset;
   3286 		wrec.w_nbytes = nbytes;	/* mem_needed -- total length */
   3287 
   3288 		wrec.w_pathlen = strlen(file);
   3289 		memcpy(wrec.w_path, file, wrec.w_pathlen);
   3290 		wrec.w_hostlen = strlen(Host);
   3291 		memcpy(wrec.w_host, Host, wrec.w_hostlen);
   3292 		wrec.w_patternlen = Pattern_Length;
   3293 		memcpy(wrec.w_pattern, Pattern, wrec.w_patternlen);
   3294 
   3295 		wrec.w_done = 0;
   3296 
   3297 		if ((woffset = wlog_record_write(&Wlog, &wrec, -1)) == -1) {
   3298 			doio_fprintf(stderr,
   3299 				     "Could not append to write-log:  %s (%d)\n",
   3300 				     SYSERR, errno);
   3301 		} else {
   3302 			logged_write = 1;
   3303 		}
   3304 	}
   3305 
   3306 	s = (*sy->sy_syscall) (req, sy, fd, addr);
   3307 
   3308 	if (s->rval == -1) {
   3309 		doio_fprintf(stderr,
   3310 			     "%s() request failed:  %s (%d)\n%s\n%s\n",
   3311 			     sy->sy_name, SYSERR, errno,
   3312 			     fmt_ioreq(req, sy, fd),
   3313 			     (*sy->sy_format) (req, sy, fd, addr));
   3314 
   3315 		doio_upanic(U_RVAL);
   3316 
   3317 		for (i = 0; i < nents; i++) {
   3318 			if (s->aioid == NULL)
   3319 				break;
   3320 			aio_unregister(s->aioid[i]);
   3321 		}
   3322 		rval = -1;
   3323 	} else {
   3324 		/*
   3325 		 * If the syscall was async, wait for I/O to complete
   3326 		 */
   3327 #ifndef __linux__
   3328 		if (sy->sy_flags & SY_ASYNC) {
   3329 			for (i = 0; i < nents; i++) {
   3330 				aio_wait(s->aioid[i]);
   3331 			}
   3332 		}
   3333 #endif
   3334 
   3335 		/*
   3336 		 * Check the syscall how-much-data-written return.  Look
   3337 		 * for this in either the return value or the 'iosw'
   3338 		 * structure.
   3339 		 */
   3340 
   3341 		if (sy->sy_flags & SY_IOSW) {
   3342 #ifdef CRAY
   3343 			for (i = 0; i < nents; i++) {
   3344 				if (s->aioid == NULL)
   3345 					break;	/* >>> error condition? */
   3346 				aiop = aio_slot(s->aioid[i]);
   3347 				iosw = &aiop->iosw;
   3348 				if (iosw->sw_error != 0) {
   3349 					doio_fprintf(stderr,
   3350 						     "%s() iosw error set: %s\n%s\n%s\n",
   3351 						     sy->sy_name,
   3352 						     strerror(iosw->sw_error),
   3353 						     fmt_ioreq(req, sy, fd),
   3354 						     (*sy->sy_format) (req, sy,
   3355 								       fd,
   3356 								       addr));
   3357 					doio_upanic(U_IOSW);
   3358 					rval = -1;
   3359 				} else if (iosw->sw_count != nbytes * nstrides) {
   3360 					doio_fprintf(stderr,
   3361 						     "Bad iosw from %s() #%d\nExpected (%d,%d,%d), got (%d,%d,%d)\n%s\n%s\n",
   3362 						     sy->sy_name, i,
   3363 						     1, 0, nbytes * nstrides,
   3364 						     iosw->sw_flag,
   3365 						     iosw->sw_error,
   3366 						     iosw->sw_count,
   3367 						     fmt_ioreq(req, sy, fd),
   3368 						     (*sy->sy_format) (req, sy,
   3369 								       fd,
   3370 								       addr));
   3371 					doio_upanic(U_IOSW);
   3372 					rval = -1;
   3373 				}
   3374 
   3375 				aio_unregister(s->aioid[i]);
   3376 			}
   3377 #endif /* CRAY */
   3378 #ifdef sgi
   3379 			for (i = 0; s->aioid[i] != -1; i++) {
   3380 				if (s->aioid == NULL) {
   3381 					doio_fprintf(stderr,
   3382 						     "aioid == NULL!\n");
   3383 					break;
   3384 				}
   3385 				aiop = aio_slot(s->aioid[i]);
   3386 
   3387 				/*
   3388 				 * make sure the io completed without error
   3389 				 */
   3390 				if (aiop->aio_errno != 0) {
   3391 					doio_fprintf(stderr,
   3392 						     "%s() aio error set: %s (%d)\n%s\n%s\n",
   3393 						     sy->sy_name,
   3394 						     strerror(aiop->aio_errno),
   3395 						     aiop->aio_errno,
   3396 						     fmt_ioreq(req, sy, fd),
   3397 						     (*sy->sy_format) (req, sy,
   3398 								       fd,
   3399 								       addr));
   3400 					doio_upanic(U_IOSW);
   3401 					rval = -1;
   3402 				} else if (aiop->aio_ret != nbytes) {
   3403 					doio_fprintf(stderr,
   3404 						     "Bad aio return from %s() #%d\nExpected (%d,%d), got (%d,%d)\n%s\n%s\n",
   3405 						     sy->sy_name, i,
   3406 						     0, nbytes,
   3407 						     aiop->aio_errno,
   3408 						     aiop->aio_ret,
   3409 						     fmt_ioreq(req, sy, fd),
   3410 						     (*sy->sy_format) (req, sy,
   3411 								       fd,
   3412 								       addr));
   3413 					aio_unregister(s->aioid[i]);
   3414 					doio_upanic(U_IOSW);
   3415 					return -1;
   3416 				} else {
   3417 					aio_unregister(s->aioid[i]);
   3418 					rval = 0;
   3419 				}
   3420 			}
   3421 #endif /* sgi */
   3422 		} else {
   3423 
   3424 			if (s->rval != mem_needed) {
   3425 				doio_fprintf(stderr,
   3426 					     "%s() request returned wrong # of bytes - expected %d, got %d\n%s\n%s\n",
   3427 					     sy->sy_name, nbytes, s->rval,
   3428 					     fmt_ioreq(req, sy, fd),
   3429 					     (*sy->sy_format) (req, sy, fd,
   3430 							       addr));
   3431 				rval = -1;
   3432 				doio_upanic(U_RVAL);
   3433 			}
   3434 		}
   3435 	}
   3436 
   3437 	/*
   3438 	 * Verify that the data was written correctly - check_file() returns
   3439 	 * a non-null pointer which contains an error message if there are
   3440 	 * problems.
   3441 	 */
   3442 
   3443 	if (rval == 0 && sy->sy_flags & SY_WRITE && v_opt) {
   3444 		msg = check_file(file, offset, nbytes * nstrides * nents,
   3445 				 Pattern, Pattern_Length, 0,
   3446 				 oflags & O_PARALLEL);
   3447 		if (msg != NULL) {
   3448 			doio_fprintf(stderr, "%s\n%s\n%s\n",
   3449 				     msg,
   3450 				     fmt_ioreq(req, sy, fd),
   3451 				     (*sy->sy_format) (req, sy, fd, addr));
   3452 			doio_upanic(U_CORRUPTION);
   3453 			exit(E_COMPARE);
   3454 		}
   3455 	}
   3456 
   3457 	/*
   3458 	 * General cleanup ...
   3459 	 *
   3460 	 * Write extent information to the write-log, so that doio_check can do
   3461 	 * corruption detection.  Note that w_done is set to 1, indicating that
   3462 	 * the write has been verified as complete.  We don't need to write the
   3463 	 * filename on the second logging.
   3464 	 */
   3465 
   3466 	if (w_opt && logged_write) {
   3467 		wrec.w_done = 1;
   3468 		wlog_record_write(&Wlog, &wrec, woffset);
   3469 	}
   3470 
   3471 	/*
   3472 	 * Unlock file region if necessary
   3473 	 */
   3474 
   3475 	if (got_lock) {
   3476 		if (lock_file_region(file, fd, F_UNLCK,
   3477 				     min_byte, (max_byte - min_byte + 1)) < 0) {
   3478 			alloc_mem(-1);
   3479 			exit(E_INTERNAL);
   3480 		}
   3481 	}
   3482 
   3483 	if (s->aioid != NULL)
   3484 		free(s->aioid);
   3485 	free(s);
   3486 	return (rval == -1) ? -1 : 0;
   3487 }
   3488 
   3489 /*
   3490  * fcntl-based requests
   3491  *   - F_FRESVSP
   3492  *   - F_UNRESVSP
   3493  *   - F_FSYNC
   3494  */
   3495 #ifdef sgi
   3496 int do_fcntl(struct io_req *req)
   3497 {
   3498 	int fd, oflags, offset, nbytes;
   3499 	int rval, op;
   3500 	int got_lock;
   3501 	int min_byte, max_byte;
   3502 	char *file, *msg;
   3503 	struct flock flk;
   3504 
   3505 	/*
   3506 	 * Initialize common fields - assumes r_oflags, r_file, r_offset, and
   3507 	 * r_nbytes are at the same offset in the read_req and reada_req
   3508 	 * structures.
   3509 	 */
   3510 	file = req->r_data.io.r_file;
   3511 	oflags = req->r_data.io.r_oflags;
   3512 	offset = req->r_data.io.r_offset;
   3513 	nbytes = req->r_data.io.r_nbytes;
   3514 
   3515 	flk.l_type = 0;
   3516 	flk.l_whence = SEEK_SET;
   3517 	flk.l_start = offset;
   3518 	flk.l_len = nbytes;
   3519 
   3520 	/*
   3521 	 * Get an open file descriptor
   3522 	 */
   3523 
   3524 	if ((fd = alloc_fd(file, oflags)) == -1)
   3525 		return -1;
   3526 
   3527 	rval = 0;
   3528 	got_lock = 0;
   3529 
   3530 	/*
   3531 	 * Lock data if this is locking option is set
   3532 	 */
   3533 	if (k_opt) {
   3534 		min_byte = offset;
   3535 		max_byte = offset + nbytes;
   3536 
   3537 		if (lock_file_region(file, fd, F_WRLCK,
   3538 				     min_byte, (nbytes + 1)) < 0) {
   3539 			doio_fprintf(stderr, "file lock failed:\n");
   3540 			doio_fprintf(stderr,
   3541 				     "          buffer(req, %d, 0, 0x%x, 0x%x)\n",
   3542 				     offset, min_byte, max_byte);
   3543 			alloc_mem(-1);
   3544 			exit(E_INTERNAL);
   3545 		}
   3546 
   3547 		got_lock = 1;
   3548 	}
   3549 
   3550 	switch (req->r_type) {
   3551 	case RESVSP:
   3552 		op = F_RESVSP;
   3553 		msg = "f_resvsp";
   3554 		break;
   3555 	case UNRESVSP:
   3556 		op = F_UNRESVSP;
   3557 		msg = "f_unresvsp";
   3558 		break;
   3559 #ifdef F_FSYNC
   3560 	case DFFSYNC:
   3561 		op = F_FSYNC;
   3562 		msg = "f_fsync";
   3563 		break;
   3564 #endif
   3565 	}
   3566 
   3567 	rval = fcntl(fd, op, &flk);
   3568 
   3569 	if (rval == -1) {
   3570 		doio_fprintf(stderr,
   3571 			     "fcntl %s request failed: %s (%d)\n\tfcntl(%d, %s %d, {%d %lld ==> %lld}\n",
   3572 			     msg, SYSERR, errno,
   3573 			     fd, msg, op, flk.l_whence,
   3574 			     (long long)flk.l_start, (long long)flk.l_len);
   3575 
   3576 		doio_upanic(U_RVAL);
   3577 		rval = -1;
   3578 	}
   3579 
   3580 	/*
   3581 	 * Unlock file region if necessary
   3582 	 */
   3583 
   3584 	if (got_lock) {
   3585 		if (lock_file_region(file, fd, F_UNLCK,
   3586 				     min_byte, (max_byte - min_byte + 1)) < 0) {
   3587 			alloc_mem(-1);
   3588 			exit(E_INTERNAL);
   3589 		}
   3590 	}
   3591 
   3592 	return (rval == -1) ? -1 : 0;
   3593 }
   3594 #endif /* sgi */
   3595 
   3596 /*
   3597  *  fsync(2) and fdatasync(2)
   3598  */
   3599 #ifndef CRAY
   3600 int do_sync(struct io_req *req)
   3601 {
   3602 	int fd, oflags;
   3603 	int rval;
   3604 	char *file;
   3605 
   3606 	/*
   3607 	 * Initialize common fields - assumes r_oflags, r_file, r_offset, and
   3608 	 * r_nbytes are at the same offset in the read_req and reada_req
   3609 	 * structures.
   3610 	 */
   3611 	file = req->r_data.io.r_file;
   3612 	oflags = req->r_data.io.r_oflags;
   3613 
   3614 	/*
   3615 	 * Get an open file descriptor
   3616 	 */
   3617 
   3618 	if ((fd = alloc_fd(file, oflags)) == -1)
   3619 		return -1;
   3620 
   3621 	rval = 0;
   3622 	switch (req->r_type) {
   3623 	case FSYNC2:
   3624 		rval = fsync(fd);
   3625 		break;
   3626 	case FDATASYNC:
   3627 		rval = fdatasync(fd);
   3628 		break;
   3629 	default:
   3630 		rval = -1;
   3631 	}
   3632 	return (rval == -1) ? -1 : 0;
   3633 }
   3634 #endif /* !CRAY */
   3635 
   3636 int
   3637 doio_pat_fill(char *addr, int mem_needed, char *Pattern, int Pattern_Length,
   3638 	      int shift)
   3639 {
   3640 	return pattern_fill(addr, mem_needed, Pattern, Pattern_Length, 0);
   3641 }
   3642 
   3643 char *doio_pat_check(char *buf, int offset, int length, char *pattern,
   3644 		     int pattern_length, int patshift)
   3645 {
   3646 	static char errbuf[4096];
   3647 	int nb, i, pattern_index;
   3648 	char *cp, *bufend, *ep;
   3649 	char actual[33], expected[33];
   3650 
   3651 	if (pattern_check(buf, length, pattern, pattern_length, patshift) != 0) {
   3652 		ep = errbuf;
   3653 		ep +=
   3654 		    sprintf(ep,
   3655 			    "Corrupt regions follow - unprintable chars are represented as '.'\n");
   3656 		ep +=
   3657 		    sprintf(ep,
   3658 			    "-----------------------------------------------------------------\n");
   3659 
   3660 		pattern_index = patshift % pattern_length;;
   3661 		cp = buf;
   3662 		bufend = buf + length;
   3663 
   3664 		while (cp < bufend) {
   3665 			if (*cp != pattern[pattern_index]) {
   3666 				nb = bufend - cp;
   3667 				if ((unsigned int)nb > sizeof(expected) - 1) {
   3668 					nb = sizeof(expected) - 1;
   3669 				}
   3670 
   3671 				ep +=
   3672 				    sprintf(ep,
   3673 					    "corrupt bytes starting at file offset %d\n",
   3674 					    offset + (int)(cp - buf));
   3675 
   3676 				/*
   3677 				 * Fill in the expected and actual patterns
   3678 				 */
   3679 				memset(expected, 0x00, sizeof(expected));
   3680 				memset(actual, 0x00, sizeof(actual));
   3681 
   3682 				for (i = 0; i < nb; i++) {
   3683 					expected[i] =
   3684 					    pattern[(pattern_index +
   3685 						     i) % pattern_length];
   3686 					if (!isprint(expected[i])) {
   3687 						expected[i] = '.';
   3688 					}
   3689 
   3690 					actual[i] = cp[i];
   3691 					if (!isprint(actual[i])) {
   3692 						actual[i] = '.';
   3693 					}
   3694 				}
   3695 
   3696 				ep +=
   3697 				    sprintf(ep,
   3698 					    "    1st %2d expected bytes:  %s\n",
   3699 					    nb, expected);
   3700 				ep +=
   3701 				    sprintf(ep,
   3702 					    "    1st %2d actual bytes:    %s\n",
   3703 					    nb, actual);
   3704 				fflush(stderr);
   3705 				return errbuf;
   3706 			} else {
   3707 				cp++;
   3708 				pattern_index++;
   3709 
   3710 				if (pattern_index == pattern_length) {
   3711 					pattern_index = 0;
   3712 				}
   3713 			}
   3714 		}
   3715 		return errbuf;
   3716 	}
   3717 
   3718 	return NULL;
   3719 }
   3720 
   3721 /*
   3722  * Check the contents of a file beginning at offset, for length bytes.  It
   3723  * is assumed that there is a string of pattern bytes in this area of the
   3724  * file.  Use normal buffered reads to do the verification.
   3725  *
   3726  * If there is a data mismatch, write a detailed message into a static buffer
   3727  * suitable for the caller to print.  Otherwise print NULL.
   3728  *
   3729  * The fsa flag is set to non-zero if the buffer should be read back through
   3730  * the FSA (unicos/mk).  This implies the file will be opened
   3731  * O_PARALLEL|O_RAW|O_WELLFORMED to do the validation.  We must do this because
   3732  * FSA will not allow the file to be opened for buffered io if it was
   3733  * previously opened for O_PARALLEL io.
   3734  */
   3735 
   3736 char *check_file(char *file, int offset, int length, char *pattern,
   3737 		 int pattern_length, int patshift, int fsa)
   3738 {
   3739 	static char errbuf[4096];
   3740 	int fd, nb, flags;
   3741 	char *buf, *em, *ep;
   3742 #ifdef sgi
   3743 	struct fd_cache *fdc;
   3744 #endif
   3745 
   3746 	buf = Memptr;
   3747 
   3748 	if (V_opt) {
   3749 		flags = Validation_Flags | O_RDONLY;
   3750 	} else {
   3751 		flags = O_RDONLY;
   3752 		if (fsa) {
   3753 #ifdef CRAY
   3754 			flags |= O_PARALLEL | O_RAW | O_WELLFORMED;
   3755 #endif
   3756 		}
   3757 	}
   3758 
   3759 	if ((fd = alloc_fd(file, flags)) == -1) {
   3760 		sprintf(errbuf,
   3761 			"Could not open file %s with flags %#o (%s) for data comparison:  %s (%d)\n",
   3762 			file, flags, format_oflags(flags), SYSERR, errno);
   3763 		return errbuf;
   3764 	}
   3765 
   3766 	if (lseek(fd, offset, SEEK_SET) == -1) {
   3767 		sprintf(errbuf,
   3768 			"Could not lseek to offset %d in %s for verification:  %s (%d)\n",
   3769 			offset, file, SYSERR, errno);
   3770 		return errbuf;
   3771 	}
   3772 #ifdef sgi
   3773 	/* Irix: Guarantee a properly aligned address on Direct I/O */
   3774 	fdc = alloc_fdcache(file, flags);
   3775 	if ((flags & O_DIRECT) && ((long)buf % fdc->c_memalign != 0)) {
   3776 		buf += fdc->c_memalign - ((long)buf % fdc->c_memalign);
   3777 	}
   3778 #endif
   3779 
   3780 	if ((nb = read(fd, buf, length)) == -1) {
   3781 #ifdef sgi
   3782 		sprintf(errbuf,
   3783 			"Could not read %d bytes from %s for verification:  %s (%d)\n\tread(%d, 0x%lx, %d)\n\tbuf %% alignment(%d) = %ld\n",
   3784 			length, file, SYSERR, errno,
   3785 			fd, buf, length,
   3786 			fdc->c_memalign, (long)buf % fdc->c_memalign);
   3787 #else
   3788 		sprintf(errbuf,
   3789 			"Could not read %d bytes from %s for verification:  %s (%d)\n",
   3790 			length, file, SYSERR, errno);
   3791 
   3792 #endif
   3793 		return errbuf;
   3794 	}
   3795 
   3796 	if (nb != length) {
   3797 		sprintf(errbuf,
   3798 			"Read wrong # bytes from %s.  Expected %d, got %d\n",
   3799 			file, length, nb);
   3800 		return errbuf;
   3801 	}
   3802 
   3803 	if ((em =
   3804 	     (*Data_Check) (buf, offset, length, pattern, pattern_length,
   3805 			    patshift)) != NULL) {
   3806 		ep = errbuf;
   3807 		ep += sprintf(ep, "*** DATA COMPARISON ERROR ***\n");
   3808 		ep +=
   3809 		    sprintf(ep, "check_file(%s, %d, %d, %s, %d, %d) failed\n\n",
   3810 			    file, offset, length, pattern, pattern_length,
   3811 			    patshift);
   3812 		ep +=
   3813 		    sprintf(ep, "Comparison fd is %d, with open flags %#o\n",
   3814 			    fd, flags);
   3815 		strcpy(ep, em);
   3816 		return (errbuf);
   3817 	}
   3818 	return NULL;
   3819 }
   3820 
   3821 /*
   3822  * Function to single-thread stdio output.
   3823  */
   3824 
   3825 int doio_fprintf(FILE * stream, char *format, ...)
   3826 {
   3827 	static int pid = -1;
   3828 	char *date;
   3829 	int rval;
   3830 	struct flock flk;
   3831 	va_list arglist;
   3832 	struct timeval ts;
   3833 	gettimeofday(&ts, NULL);
   3834 	date = hms(ts.tv_sec);
   3835 
   3836 	if (pid == -1) {
   3837 		pid = getpid();
   3838 	}
   3839 
   3840 	flk.l_whence = flk.l_start = flk.l_len = 0;
   3841 	flk.l_type = F_WRLCK;
   3842 	fcntl(fileno(stream), F_SETLKW, &flk);
   3843 
   3844 	va_start(arglist, format);
   3845 	rval = fprintf(stream, "\n%s%s (%5d) %s\n", Prog, TagName, pid, date);
   3846 	rval += fprintf(stream, "---------------------\n");
   3847 	vfprintf(stream, format, arglist);
   3848 	va_end(arglist);
   3849 
   3850 	fflush(stream);
   3851 
   3852 	flk.l_type = F_UNLCK;
   3853 	fcntl(fileno(stream), F_SETLKW, &flk);
   3854 
   3855 	return rval;
   3856 }
   3857 
   3858 /*
   3859  * Simple function for allocating core memory.  Uses Memsize and Memptr to
   3860  * keep track of the current amount allocated.
   3861  */
   3862 #ifndef CRAY
   3863 int alloc_mem(int nbytes)
   3864 {
   3865 	char *cp;
   3866 	void *addr;
   3867 	int me = 0, flags, key, shmid;
   3868 	static int mturn = 0;	/* which memory type to use */
   3869 	struct memalloc *M;
   3870 	char filename[255];
   3871 #ifdef __linux__
   3872 	struct shmid_ds shm_ds;
   3873 #endif
   3874 
   3875 #ifdef __linux__
   3876 	memset(&shm_ds, 0x00, sizeof(struct shmid_ds));
   3877 #endif
   3878 
   3879 	/* nbytes = -1 means "free all allocated memory" */
   3880 	if (nbytes == -1) {
   3881 
   3882 		for (me = 0; me < Nmemalloc; me++) {
   3883 			if (Memalloc[me].space == NULL)
   3884 				continue;
   3885 
   3886 			switch (Memalloc[me].memtype) {
   3887 			case MEM_DATA:
   3888 #ifdef sgi
   3889 				if (Memalloc[me].flags & MEMF_MPIN)
   3890 					munpin(Memalloc[me].space,
   3891 					       Memalloc[me].size);
   3892 #endif
   3893 				free(Memalloc[me].space);
   3894 				Memalloc[me].space = NULL;
   3895 				Memptr = NULL;
   3896 				Memsize = 0;
   3897 				break;
   3898 			case MEM_SHMEM:
   3899 #ifdef sgi
   3900 				if (Memalloc[me].flags & MEMF_MPIN)
   3901 					munpin(Memalloc[me].space,
   3902 					       Memalloc[me].size);
   3903 #endif
   3904 				shmdt(Memalloc[me].space);
   3905 				Memalloc[me].space = NULL;
   3906 #ifdef sgi
   3907 				shmctl(Memalloc[me].fd, IPC_RMID);
   3908 #else
   3909 				shmctl(Memalloc[me].fd, IPC_RMID, &shm_ds);
   3910 #endif
   3911 				break;
   3912 			case MEM_MMAP:
   3913 #ifdef sgi
   3914 				if (Memalloc[me].flags & MEMF_MPIN)
   3915 					munpin(Memalloc[me].space,
   3916 					       Memalloc[me].size);
   3917 #endif
   3918 				munmap(Memalloc[me].space, Memalloc[me].size);
   3919 				close(Memalloc[me].fd);
   3920 				if (Memalloc[me].flags & MEMF_FILE) {
   3921 					unlink(Memalloc[me].name);
   3922 				}
   3923 				Memalloc[me].space = NULL;
   3924 				break;
   3925 			default:
   3926 				doio_fprintf(stderr,
   3927 					     "alloc_mem: HELP! Unknown memory space type %d index %d\n",
   3928 					     Memalloc[me].memtype, me);
   3929 				break;
   3930 			}
   3931 		}
   3932 		return 0;
   3933 	}
   3934 
   3935 	/*
   3936 	 * Select a memory area (currently round-robbin)
   3937 	 */
   3938 
   3939 	if (mturn >= Nmemalloc)
   3940 		mturn = 0;
   3941 
   3942 	M = &Memalloc[mturn];
   3943 
   3944 	switch (M->memtype) {
   3945 	case MEM_DATA:
   3946 		if (nbytes > M->size) {
   3947 			if (M->space != NULL) {
   3948 #ifdef sgi
   3949 				if (M->flags & MEMF_MPIN)
   3950 					munpin(M->space, M->size);
   3951 #endif
   3952 				free(M->space);
   3953 			}
   3954 			M->space = NULL;
   3955 			M->size = 0;
   3956 		}
   3957 
   3958 		if (M->space == NULL) {
   3959 			if ((cp = malloc(nbytes)) == NULL) {
   3960 				doio_fprintf(stderr,
   3961 					     "malloc(%d) failed:  %s (%d)\n",
   3962 					     nbytes, SYSERR, errno);
   3963 				return -1;
   3964 			}
   3965 #ifdef sgi
   3966 			if (M->flags & MEMF_MPIN) {
   3967 				if (mpin(cp, nbytes) == -1) {
   3968 					doio_fprintf(stderr,
   3969 						     "mpin(0x%lx, %d) failed:  %s (%d)\n",
   3970 						     cp, nbytes, SYSERR, errno);
   3971 				}
   3972 			}
   3973 #endif
   3974 			M->space = (void *)cp;
   3975 			M->size = nbytes;
   3976 		}
   3977 		break;
   3978 
   3979 	case MEM_MMAP:
   3980 		if (nbytes > M->size) {
   3981 			if (M->space != NULL) {
   3982 #ifdef sgi
   3983 				if (M->flags & MEMF_MPIN)
   3984 					munpin(M->space, M->size);
   3985 #endif
   3986 				munmap(M->space, M->size);
   3987 				close(M->fd);
   3988 				if (M->flags & MEMF_FILE)
   3989 					unlink(M->name);
   3990 			}
   3991 			M->space = NULL;
   3992 			M->size = 0;
   3993 		}
   3994 
   3995 		if (M->space == NULL) {
   3996 			if (strchr(M->name, '%')) {
   3997 				sprintf(filename, M->name, getpid());
   3998 				M->name = strdup(filename);
   3999 			}
   4000 
   4001 			if ((M->fd =
   4002 			     open(M->name, O_CREAT | O_RDWR, 0666)) == -1) {
   4003 				doio_fprintf(stderr,
   4004 					     "alloc_mmap: error %d (%s) opening '%s'\n",
   4005 					     errno, SYSERR, M->name);
   4006 				return (-1);
   4007 			}
   4008 
   4009 			addr = NULL;
   4010 			flags = 0;
   4011 			M->size = nbytes * 4;
   4012 
   4013 			/* bias addr if MEMF_ADDR | MEMF_FIXADDR */
   4014 			/* >>> how to pick a memory address? */
   4015 
   4016 			/* bias flags on MEMF_PRIVATE etc */
   4017 			if (M->flags & MEMF_PRIVATE)
   4018 				flags |= MAP_PRIVATE;
   4019 #ifdef sgi
   4020 			if (M->flags & MEMF_LOCAL)
   4021 				flags |= MAP_LOCAL;
   4022 			if (M->flags & MEMF_AUTORESRV)
   4023 				flags |= MAP_AUTORESRV;
   4024 			if (M->flags & MEMF_AUTOGROW)
   4025 				flags |= MAP_AUTOGROW;
   4026 #endif
   4027 			if (M->flags & MEMF_SHARED)
   4028 				flags |= MAP_SHARED;
   4029 
   4030 /*printf("alloc_mem, about to mmap, fd=%d, name=(%s)\n", M->fd, M->name);*/
   4031 			if ((M->space = mmap(addr, M->size,
   4032 					     PROT_READ | PROT_WRITE,
   4033 					     flags, M->fd, 0))
   4034 			    == MAP_FAILED) {
   4035 				doio_fprintf(stderr,
   4036 					     "alloc_mem: mmap error. errno %d (%s)\n\tmmap(addr 0x%x, size %d, read|write 0x%x, mmap flags 0x%x [%#o], fd %d, 0)\n\tfile %s\n",
   4037 					     errno, SYSERR, addr, M->size,
   4038 					     PROT_READ | PROT_WRITE, flags,
   4039 					     M->flags, M->fd, M->name);
   4040 				doio_fprintf(stderr, "\t%s%s%s%s%s",
   4041 					     (flags & MAP_PRIVATE) ? "private "
   4042 					     : "",
   4043 #ifdef sgi
   4044 					     (flags & MAP_LOCAL) ? "local " :
   4045 					     "",
   4046 					     (flags & MAP_AUTORESRV) ?
   4047 					     "autoresrv " : "",
   4048 					     (flags & MAP_AUTOGROW) ?
   4049 					     "autogrow " : "",
   4050 #endif
   4051 					     (flags & MAP_SHARED) ? "shared" :
   4052 					     "");
   4053 				return (-1);
   4054 			}
   4055 		}
   4056 		break;
   4057 
   4058 	case MEM_SHMEM:
   4059 		if (nbytes > M->size) {
   4060 			if (M->space != NULL) {
   4061 #ifdef sgi
   4062 				if (M->flags & MEMF_MPIN)
   4063 					munpin(M->space, M->size);
   4064 #endif
   4065 				shmdt(M->space);
   4066 #ifdef sgi
   4067 				shmctl(M->fd, IPC_RMID);
   4068 #else
   4069 				shmctl(M->fd, IPC_RMID, &shm_ds);
   4070 #endif
   4071 			}
   4072 			M->space = NULL;
   4073 			M->size = 0;
   4074 		}
   4075 
   4076 		if (M->space == NULL) {
   4077 			if (!strcmp(M->name, "private")) {
   4078 				key = IPC_PRIVATE;
   4079 			} else {
   4080 				sscanf(M->name, "%i", &key);
   4081 			}
   4082 
   4083 			M->size = M->nblks ? M->nblks * 512 : nbytes;
   4084 
   4085 			if (nbytes > M->size) {
   4086 #ifdef DEBUG
   4087 				doio_fprintf(stderr,
   4088 					     "MEM_SHMEM: nblks(%d) too small:  nbytes=%d  Msize=%d, skipping this req.\n",
   4089 					     M->nblks, nbytes, M->size);
   4090 #endif
   4091 				return SKIP_REQ;
   4092 			}
   4093 
   4094 			shmid = shmget(key, M->size, IPC_CREAT | 0666);
   4095 			if (shmid == -1) {
   4096 				doio_fprintf(stderr,
   4097 					     "shmget(0x%x, %d, CREAT) failed: %s (%d)\n",
   4098 					     key, M->size, SYSERR, errno);
   4099 				return (-1);
   4100 			}
   4101 			M->fd = shmid;
   4102 			M->space = shmat(shmid, NULL, SHM_RND);
   4103 			if (M->space == (void *)-1) {
   4104 				doio_fprintf(stderr,
   4105 					     "shmat(0x%x, NULL, SHM_RND) failed: %s (%d)\n",
   4106 					     shmid, SYSERR, errno);
   4107 				return (-1);
   4108 			}
   4109 #ifdef sgi
   4110 			if (M->flags & MEMF_MPIN) {
   4111 				if (mpin(M->space, M->size) == -1) {
   4112 					doio_fprintf(stderr,
   4113 						     "mpin(0x%lx, %d) failed:  %s (%d)\n",
   4114 						     M->space, M->size, SYSERR,
   4115 						     errno);
   4116 				}
   4117 			}
   4118 #endif
   4119 		}
   4120 		break;
   4121 
   4122 	default:
   4123 		doio_fprintf(stderr,
   4124 			     "alloc_mem: HELP! Unknown memory space type %d index %d\n",
   4125 			     Memalloc[me].memtype, mturn);
   4126 		break;
   4127 	}
   4128 
   4129 	Memptr = M->space;
   4130 	Memsize = M->size;
   4131 
   4132 	mturn++;
   4133 	return 0;
   4134 }
   4135 #else /* CRAY */
   4136 int alloc_mem(int nbytes)
   4137 {
   4138 	char *cp;
   4139 	int ip;
   4140 	static char *malloc_space;
   4141 
   4142 	/*
   4143 	 * The "unicos" version of this did some stuff with sbrk;
   4144 	 * this caused problems with async I/O on irix, and now appears
   4145 	 * to be causing problems with FSA I/O on unicos/mk.
   4146 	 */
   4147 #ifdef NOTDEF
   4148 	if (nbytes > Memsize) {
   4149 		if ((cp = (char *)sbrk(nbytes - Memsize)) == (char *)-1) {
   4150 			doio_fprintf(stderr, "sbrk(%d) failed:  %s (%d)\n",
   4151 				     nbytes - Memsize, SYSERR, errno);
   4152 			return -1;
   4153 		}
   4154 
   4155 		if (Memsize == 0)
   4156 			Memptr = cp;
   4157 		Memsize += nbytes - Memsize;
   4158 	}
   4159 #else
   4160 
   4161 	/* nbytes = -1 means "free all allocated memory" */
   4162 	if (nbytes == -1) {
   4163 		free(malloc_space);
   4164 		Memptr = NULL;
   4165 		Memsize = 0;
   4166 		return 0;
   4167 	}
   4168 
   4169 	if (nbytes > Memsize) {
   4170 		if (Memsize != 0)
   4171 			free(malloc_space);
   4172 
   4173 		if ((cp = malloc_space = malloc(nbytes)) == NULL) {
   4174 			doio_fprintf(stderr, "malloc(%d) failed:  %s (%d)\n",
   4175 				     nbytes, SYSERR, errno);
   4176 			return -1;
   4177 		}
   4178 #ifdef _CRAYT3E
   4179 		/* T3E requires memory to be aligned on 0x40 word boundaries */
   4180 		ip = (int)cp;
   4181 		if (ip & 0x3F != 0) {
   4182 			doio_fprintf(stderr,
   4183 				     "malloc(%d) = 0x%x(0x%x) not aligned by 0x%x\n",
   4184 				     nbytes, cp, ip, ip & 0x3f);
   4185 
   4186 			free(cp);
   4187 			if ((cp = malloc_space = malloc(nbytes + 0x40)) == NULL) {
   4188 				doio_fprintf(stderr,
   4189 					     "malloc(%d) failed:  %s (%d)\n",
   4190 					     nbytes, SYSERR, errno);
   4191 				return -1;
   4192 			}
   4193 			ip = (int)cp;
   4194 			cp += (0x40 - (ip & 0x3F));
   4195 		}
   4196 #endif /* _CRAYT3E */
   4197 		Memptr = cp;
   4198 		Memsize = nbytes;
   4199 	}
   4200 #endif /* NOTDEF */
   4201 	return 0;
   4202 }
   4203 #endif /* CRAY */
   4204 
   4205 /*
   4206  * Simple function for allocating sds space.  Uses Sdssize and Sdsptr to
   4207  * keep track of location and size of currently allocated chunk.
   4208  */
   4209 
   4210 #ifdef _CRAY1
   4211 
   4212 int alloc_sds(int nbytes)
   4213 {
   4214 	int nblks;
   4215 
   4216 	if (nbytes > Sdssize) {
   4217 		if ((nblks = ssbreak(btoc(nbytes - Sdssize))) == -1) {
   4218 			doio_fprintf(stderr, "ssbreak(%d) failed:  %s (%d)\n",
   4219 				     btoc(nbytes - Sdssize), SYSERR, errno);
   4220 			return -1;
   4221 		}
   4222 
   4223 		Sdssize = ctob(nblks);
   4224 		Sdsptr = 0;
   4225 	}
   4226 
   4227 	return 0;
   4228 }
   4229 
   4230 #else
   4231 
   4232 #ifdef CRAY
   4233 
   4234 int alloc_sds(int nbytes)
   4235 {
   4236 	doio_fprintf(stderr,
   4237 		     "Internal Error - alloc_sds() called on a CRAY2 system\n");
   4238 	alloc_mem(-1);
   4239 	exit(E_INTERNAL);
   4240 }
   4241 
   4242 #endif
   4243 
   4244 #endif /* _CRAY1 */
   4245 
   4246 /*
   4247  * Function to maintain a file descriptor cache, so that doio does not have
   4248  * to do so many open() and close() calls.  Descriptors are stored in the
   4249  * cache by file name, and open flags.  Each entry also has a _rtc value
   4250  * associated with it which is used in aging.  If doio cannot open a file
   4251  * because it already has too many open (ie. system limit hit) it will close
   4252  * the one in the cache that has the oldest _rtc value.
   4253  *
   4254  * If alloc_fd() is called with a file of NULL, it will close all descriptors
   4255  * in the cache, and free the memory in the cache.
   4256  */
   4257 
   4258 int alloc_fd(char *file, int oflags)
   4259 {
   4260 	struct fd_cache *fdc;
   4261 	struct fd_cache *alloc_fdcache(char *file, int oflags);
   4262 
   4263 	fdc = alloc_fdcache(file, oflags);
   4264 	if (fdc != NULL)
   4265 		return (fdc->c_fd);
   4266 	else
   4267 		return (-1);
   4268 }
   4269 
   4270 struct fd_cache *alloc_fdcache(char *file, int oflags)
   4271 {
   4272 	int fd;
   4273 	struct fd_cache *free_slot, *oldest_slot, *cp;
   4274 	static int cache_size = 0;
   4275 	static struct fd_cache *cache = NULL;
   4276 #ifdef sgi
   4277 	struct dioattr finfo;
   4278 #endif
   4279 
   4280 	/*
   4281 	 * If file is NULL, it means to free up the fd cache.
   4282 	 */
   4283 
   4284 	if (file == NULL && cache != NULL) {
   4285 		for (cp = cache; cp < &cache[cache_size]; cp++) {
   4286 			if (cp->c_fd != -1) {
   4287 				close(cp->c_fd);
   4288 			}
   4289 #ifndef CRAY
   4290 			if (cp->c_memaddr != NULL) {
   4291 				munmap(cp->c_memaddr, cp->c_memlen);
   4292 			}
   4293 #endif
   4294 		}
   4295 
   4296 		free(cache);
   4297 		cache = NULL;
   4298 		cache_size = 0;
   4299 		return 0;
   4300 	}
   4301 
   4302 	free_slot = NULL;
   4303 	oldest_slot = NULL;
   4304 
   4305 	/*
   4306 	 * Look for a fd in the cache.  If one is found, return it directly.
   4307 	 * Otherwise, when this loop exits, oldest_slot will point to the
   4308 	 * oldest fd slot in the cache, and free_slot will point to an
   4309 	 * unoccupied slot if there are any.
   4310 	 */
   4311 
   4312 	for (cp = cache; cp != NULL && cp < &cache[cache_size]; cp++) {
   4313 		if (cp->c_fd != -1 &&
   4314 		    cp->c_oflags == oflags && strcmp(cp->c_file, file) == 0) {
   4315 #ifdef CRAY
   4316 			cp->c_rtc = _rtc();
   4317 #else
   4318 			cp->c_rtc = Reqno;
   4319 #endif
   4320 			return cp;
   4321 		}
   4322 
   4323 		if (cp->c_fd == -1) {
   4324 			if (free_slot == NULL) {
   4325 				free_slot = cp;
   4326 			}
   4327 		} else {
   4328 			if (oldest_slot == NULL ||
   4329 			    cp->c_rtc < oldest_slot->c_rtc) {
   4330 				oldest_slot = cp;
   4331 			}
   4332 		}
   4333 	}
   4334 
   4335 	/*
   4336 	 * No matching file/oflags pair was found in the cache.  Attempt to
   4337 	 * open a new fd.
   4338 	 */
   4339 
   4340 	if ((fd = open(file, oflags, 0666)) < 0) {
   4341 		if (errno != EMFILE) {
   4342 			doio_fprintf(stderr,
   4343 				     "Could not open file %s with flags %#o (%s): %s (%d)\n",
   4344 				     file, oflags, format_oflags(oflags),
   4345 				     SYSERR, errno);
   4346 			alloc_mem(-1);
   4347 			exit(E_SETUP);
   4348 		}
   4349 
   4350 		/*
   4351 		 * If we get here, we have as many open fd's as we can have.
   4352 		 * Close the oldest one in the cache (pointed to by
   4353 		 * oldest_slot), and attempt to re-open.
   4354 		 */
   4355 
   4356 		close(oldest_slot->c_fd);
   4357 		oldest_slot->c_fd = -1;
   4358 		free_slot = oldest_slot;
   4359 
   4360 		if ((fd = open(file, oflags, 0666)) < 0) {
   4361 			doio_fprintf(stderr,
   4362 				     "Could not open file %s with flags %#o (%s):  %s (%d)\n",
   4363 				     file, oflags, format_oflags(oflags),
   4364 				     SYSERR, errno);
   4365 			alloc_mem(-1);
   4366 			exit(E_SETUP);
   4367 		}
   4368 	}
   4369 
   4370 /*printf("alloc_fd: new file %s flags %#o fd %d\n", file, oflags, fd);*/
   4371 
   4372 	/*
   4373 	 * If we get here, fd is our open descriptor.  If free_slot is NULL,
   4374 	 * we need to grow the cache, otherwise free_slot is the slot that
   4375 	 * should hold the fd info.
   4376 	 */
   4377 
   4378 	if (free_slot == NULL) {
   4379 		cache =
   4380 		    (struct fd_cache *)realloc(cache,
   4381 					       sizeof(struct fd_cache) *
   4382 					       (FD_ALLOC_INCR + cache_size));
   4383 		if (cache == NULL) {
   4384 			doio_fprintf(stderr,
   4385 				     "Could not malloc() space for fd chace");
   4386 			alloc_mem(-1);
   4387 			exit(E_SETUP);
   4388 		}
   4389 
   4390 		cache_size += FD_ALLOC_INCR;
   4391 
   4392 		for (cp = &cache[cache_size - FD_ALLOC_INCR];
   4393 		     cp < &cache[cache_size]; cp++) {
   4394 			cp->c_fd = -1;
   4395 		}
   4396 
   4397 		free_slot = &cache[cache_size - FD_ALLOC_INCR];
   4398 	}
   4399 
   4400 	/*
   4401 	 * finally, fill in the cache slot info
   4402 	 */
   4403 
   4404 	free_slot->c_fd = fd;
   4405 	free_slot->c_oflags = oflags;
   4406 	strcpy(free_slot->c_file, file);
   4407 #ifdef CRAY
   4408 	free_slot->c_rtc = _rtc();
   4409 #else
   4410 	free_slot->c_rtc = Reqno;
   4411 #endif
   4412 
   4413 #ifdef sgi
   4414 	if (oflags & O_DIRECT) {
   4415 		if (fcntl(fd, F_DIOINFO, &finfo) == -1) {
   4416 			finfo.d_mem = 1;
   4417 			finfo.d_miniosz = 1;
   4418 			finfo.d_maxiosz = 1;
   4419 		}
   4420 	} else {
   4421 		finfo.d_mem = 1;
   4422 		finfo.d_miniosz = 1;
   4423 		finfo.d_maxiosz = 1;
   4424 	}
   4425 
   4426 	free_slot->c_memalign = finfo.d_mem;
   4427 	free_slot->c_miniosz = finfo.d_miniosz;
   4428 	free_slot->c_maxiosz = finfo.d_maxiosz;
   4429 #endif /* sgi */
   4430 #ifndef CRAY
   4431 	free_slot->c_memaddr = NULL;
   4432 	free_slot->c_memlen = 0;
   4433 #endif
   4434 
   4435 	return free_slot;
   4436 }
   4437 
   4438 /*
   4439  *
   4440  *			Signal Handling Section
   4441  *
   4442  *
   4443  */
   4444 
   4445 #ifdef sgi
   4446 /*
   4447  * "caller-id" for signals
   4448  */
   4449 void signal_info(int sig, siginfo_t * info, void *v)
   4450 {
   4451 	int haveit = 0;
   4452 
   4453 	if (info != NULL) {
   4454 		switch (info->si_code) {
   4455 		case SI_USER:
   4456 			doio_fprintf(stderr,
   4457 				     "signal_info: si_signo %d si_errno %d si_code SI_USER pid %d uid %d\n",
   4458 				     info->si_signo, info->si_errno,
   4459 				     info->si_pid, info->si_uid);
   4460 			haveit = 1;
   4461 			break;
   4462 
   4463 		case SI_QUEUE:
   4464 			doio_fprintf(stderr,
   4465 				     "signal_info  si_signo %d si_code = SI_QUEUE\n",
   4466 				     info->si_signo);
   4467 			haveit = 1;
   4468 			break;
   4469 		}
   4470 
   4471 		if (!haveit) {
   4472 			if ((info->si_signo == SIGSEGV) ||
   4473 			    (info->si_signo == SIGBUS)) {
   4474 				doio_fprintf(stderr,
   4475 					     "signal_info  si_signo %d si_errno %d si_code = %d  si_addr=%p  active_mmap_rw=%d havesigint=%d\n",
   4476 					     info->si_signo, info->si_errno,
   4477 					     info->si_code, info->si_addr,
   4478 					     active_mmap_rw, havesigint);
   4479 				haveit = 1;
   4480 			}
   4481 		}
   4482 
   4483 		if (!haveit) {
   4484 			doio_fprintf(stderr,
   4485 				     "signal_info: si_signo %d si_errno %d unknown code %d\n",
   4486 				     info->si_signo, info->si_errno,
   4487 				     info->si_code);
   4488 		}
   4489 	} else {
   4490 		doio_fprintf(stderr, "signal_info: sig %d\n", sig);
   4491 	}
   4492 }
   4493 
   4494 void cleanup_handler(int sig, siginfo_t * info, void *v)
   4495 {
   4496 	havesigint = 1;		/* in case there's a followup signal */
   4497 	/*signal_info(sig, info, v); *//* be quiet on "normal" kill */
   4498 	alloc_mem(-1);
   4499 	exit(0);
   4500 }
   4501 
   4502 void die_handler(int sig, siginfo_t * info, void *v)
   4503 {
   4504 	doio_fprintf(stderr, "terminating on signal %d\n", sig);
   4505 	signal_info(sig, info, v);
   4506 	alloc_mem(-1);
   4507 	exit(1);
   4508 }
   4509 
   4510 void sigbus_handler(int sig, siginfo_t * info, void *v)
   4511 {
   4512 	/* While we are doing a memcpy to/from an mmapped region we can
   4513 	   get a SIGBUS for a variety of reasons--and not all of them
   4514 	   should be considered failures.
   4515 
   4516 	   Under normal conditions if we get a SIGINT it means we've been
   4517 	   told to shutdown.  However, if we're currently doing the above-
   4518 	   mentioned memcopy then the kernel will follow that SIGINT with
   4519 	   a SIGBUS.  We can guess that we're in this situation by seeing
   4520 	   that the si_errno field in the siginfo structure has EINTR as
   4521 	   an errno.  (We might make the guess stronger by looking at the
   4522 	   si_addr field to see that it's not faulting off the end of the
   4523 	   mmapped region, but it seems that in such a case havesigint
   4524 	   would not have been set so maybe that doesn't make the guess
   4525 	   stronger.)
   4526 	 */
   4527 
   4528 	if (active_mmap_rw && havesigint && (info->si_errno == EINTR)) {
   4529 		cleanup_handler(sig, info, v);
   4530 	} else {
   4531 		die_handler(sig, info, v);
   4532 	}
   4533 }
   4534 #else
   4535 
   4536 void cleanup_handler(int sig)
   4537 {
   4538 	havesigint = 1;		/* in case there's a followup signal */
   4539 	alloc_mem(-1);
   4540 	exit(0);
   4541 }
   4542 
   4543 void die_handler(int sig)
   4544 {
   4545 	doio_fprintf(stderr, "terminating on signal %d\n", sig);
   4546 	alloc_mem(-1);
   4547 	exit(1);
   4548 }
   4549 
   4550 #ifndef CRAY
   4551 void sigbus_handler(int sig)
   4552 {
   4553 	/* See sigbus_handler() in the 'ifdef sgi' case for details.  Here,
   4554 	   we don't have the siginfo stuff so the guess is weaker but we'll
   4555 	   do it anyway.
   4556 	 */
   4557 
   4558 	if (active_mmap_rw && havesigint)
   4559 		cleanup_handler(sig);
   4560 	else
   4561 		die_handler(sig);
   4562 }
   4563 #endif /* !CRAY */
   4564 #endif /* sgi */
   4565 
   4566 void noop_handler(int sig)
   4567 {
   4568 	return;
   4569 }
   4570 
   4571 /*
   4572  * SIGINT handler for the parent (original doio) process.  It simply sends
   4573  * a SIGINT to all of the doio children.  Since they're all in the same
   4574  * pgrp, this can be done with a single kill().
   4575  */
   4576 
   4577 void sigint_handler(int sig)
   4578 {
   4579 	int i;
   4580 
   4581 	for (i = 0; i < Nchildren; i++) {
   4582 		if (Children[i] != -1) {
   4583 			kill(Children[i], SIGINT);
   4584 		}
   4585 	}
   4586 }
   4587 
   4588 /*
   4589  * Signal handler used to inform a process when async io completes.  Referenced
   4590  * in do_read() and do_write().  Note that the signal handler is not
   4591  * re-registered.
   4592  */
   4593 
   4594 void aio_handler(int sig)
   4595 {
   4596 	unsigned int i;
   4597 	struct aio_info *aiop;
   4598 
   4599 	for (i = 0; i < sizeof(Aio_Info) / sizeof(Aio_Info[0]); i++) {
   4600 		aiop = &Aio_Info[i];
   4601 
   4602 		if (aiop->strategy == A_SIGNAL && aiop->sig == sig) {
   4603 			aiop->signalled++;
   4604 
   4605 			if (aio_done(aiop)) {
   4606 				aiop->done++;
   4607 			}
   4608 		}
   4609 	}
   4610 }
   4611 
   4612 /*
   4613  * dump info on all open aio slots
   4614  */
   4615 void dump_aio(void)
   4616 {
   4617 	unsigned int i, count;
   4618 
   4619 	count = 0;
   4620 	for (i = 0; i < sizeof(Aio_Info) / sizeof(Aio_Info[0]); i++) {
   4621 		if (Aio_Info[i].busy) {
   4622 			count++;
   4623 			fprintf(stderr,
   4624 				"Aio_Info[%03d] id=%d fd=%d signal=%d signaled=%d\n",
   4625 				i, Aio_Info[i].id,
   4626 				Aio_Info[i].fd,
   4627 				Aio_Info[i].sig, Aio_Info[i].signalled);
   4628 			fprintf(stderr, "\tstrategy=%s\n",
   4629 				format_strat(Aio_Info[i].strategy));
   4630 		}
   4631 	}
   4632 	fprintf(stderr, "%d active async i/os\n", count);
   4633 }
   4634 
   4635 #ifdef sgi
   4636 /*
   4637  * Signal handler called as a callback, not as a signal.
   4638  * 'val' is the value from sigev_value and is assumed to be the
   4639  * Aio_Info[] index.
   4640  */
   4641 void cb_handler(sigval_t val)
   4642 {
   4643 	struct aio_info *aiop;
   4644 
   4645 /*printf("cb_handler requesting slot %d\n", val.sival_int);*/
   4646 	aiop = aio_slot(val.sival_int);
   4647 /*printf("cb_handler, aiop=%p\n", aiop);*/
   4648 
   4649 /*printf("%d in cb_handler\n", getpid() );*/
   4650 	if (aiop->strategy == A_CALLBACK) {
   4651 		aiop->signalled++;
   4652 
   4653 		if (aio_done(aiop)) {
   4654 			aiop->done++;
   4655 		}
   4656 	}
   4657 }
   4658 #endif
   4659 
   4660 struct aio_info *aio_slot(int aio_id)
   4661 {
   4662 	unsigned int i;
   4663 	static int id = 1;
   4664 	struct aio_info *aiop;
   4665 
   4666 	aiop = NULL;
   4667 
   4668 	for (i = 0; i < sizeof(Aio_Info) / sizeof(Aio_Info[0]); i++) {
   4669 		if (aio_id == -1) {
   4670 			if (!Aio_Info[i].busy) {
   4671 				aiop = &Aio_Info[i];
   4672 				aiop->busy = 1;
   4673 				aiop->id = id++;
   4674 				break;
   4675 			}
   4676 		} else {
   4677 			if (Aio_Info[i].busy && Aio_Info[i].id == aio_id) {
   4678 				aiop = &Aio_Info[i];
   4679 				break;
   4680 			}
   4681 		}
   4682 	}
   4683 
   4684 	if (aiop == NULL) {
   4685 		doio_fprintf(stderr, "aio_slot(%d) not found.  Request %d\n",
   4686 			     aio_id, Reqno);
   4687 		dump_aio();
   4688 		alloc_mem(-1);
   4689 		exit(E_INTERNAL);
   4690 	}
   4691 
   4692 	return aiop;
   4693 }
   4694 
   4695 int aio_register(int fd, int strategy, int sig)
   4696 {
   4697 	struct aio_info *aiop;
   4698 	struct sigaction sa;
   4699 
   4700 	aiop = aio_slot(-1);
   4701 
   4702 	aiop->fd = fd;
   4703 	aiop->strategy = strategy;
   4704 	aiop->done = 0;
   4705 #ifdef CRAY
   4706 	memset((char *)&aiop->iosw, 0x00, sizeof(aiop->iosw));
   4707 #endif
   4708 
   4709 	if (strategy == A_SIGNAL) {
   4710 		aiop->sig = sig;
   4711 		aiop->signalled = 0;
   4712 
   4713 		sa.sa_handler = aio_handler;
   4714 		sa.sa_flags = 0;
   4715 		sigemptyset(&sa.sa_mask);
   4716 
   4717 		sigaction(sig, &sa, &aiop->osa);
   4718 	} else {
   4719 		aiop->sig = -1;
   4720 		aiop->signalled = 0;
   4721 	}
   4722 
   4723 	return aiop->id;
   4724 }
   4725 
   4726 int aio_unregister(int aio_id)
   4727 {
   4728 	struct aio_info *aiop;
   4729 
   4730 	aiop = aio_slot(aio_id);
   4731 
   4732 	if (aiop->strategy == A_SIGNAL) {
   4733 		sigaction(aiop->sig, &aiop->osa, NULL);
   4734 	}
   4735 
   4736 	aiop->busy = 0;
   4737 	return 0;
   4738 }
   4739 
   4740 #ifndef __linux__
   4741 int aio_wait(int aio_id)
   4742 {
   4743 #ifdef RECALL_SIZEOF
   4744 	long mask[RECALL_SIZEOF];
   4745 #endif
   4746 	sigset_t signalset;
   4747 	struct aio_info *aiop;
   4748 #ifdef CRAY
   4749 	struct iosw *ioswlist[1];
   4750 #endif
   4751 #ifdef sgi
   4752 	const aiocb_t *aioary[1];
   4753 #endif
   4754 	int r, cnt;
   4755 
   4756 	aiop = aio_slot(aio_id);
   4757 /*printf("%d aiop B =%p\n", getpid(), aiop);*/
   4758 
   4759 	switch (aiop->strategy) {
   4760 	case A_POLL:
   4761 		while (!aio_done(aiop)) ;
   4762 		break;
   4763 
   4764 	case A_SIGNAL:
   4765 		sigemptyset(&signalset);
   4766 		sighold(aiop->sig);
   4767 
   4768 		while (!aiop->signalled || !aiop->done) {
   4769 			sigsuspend(&signalset);
   4770 			sighold(aiop->sig);
   4771 		}
   4772 		break;
   4773 
   4774 #ifdef CRAY
   4775 	case A_RECALL:
   4776 		ioswlist[0] = &aiop->iosw;
   4777 		if (recall(aiop->fd, 1, ioswlist) < 0) {
   4778 			doio_fprintf(stderr, "recall() failed:  %s (%d)\n",
   4779 				     SYSERR, errno);
   4780 			exit(E_SETUP);
   4781 		}
   4782 		break;
   4783 
   4784 #ifdef RECALL_SIZEOF
   4785 
   4786 	case A_RECALLA:
   4787 		RECALL_INIT(mask);
   4788 		RECALL_SET(mask, aiop->fd);
   4789 		if (recalla(mask) < 0) {
   4790 			doio_fprintf(stderr, "recalla() failed:  %s (%d)\n",
   4791 				     SYSERR, errno);
   4792 			exit(E_SETUP);
   4793 		}
   4794 
   4795 		RECALL_CLR(mask, aiop->fd);
   4796 		break;
   4797 #endif
   4798 
   4799 	case A_RECALLS:
   4800 		ioswlist[0] = &aiop->iosw;
   4801 		if (recalls(1, ioswlist) < 0) {
   4802 			doio_fprintf(stderr, "recalls failed:  %s (%d)\n",
   4803 				     SYSERR, errno);
   4804 			exit(E_SETUP);
   4805 		}
   4806 		break;
   4807 #endif /* CRAY */
   4808 
   4809 #ifdef sgi
   4810 	case A_CALLBACK:
   4811 		aioary[0] = &aiop->aiocb;
   4812 		cnt = 0;
   4813 		do {
   4814 			r = aio_suspend(aioary, 1, NULL);
   4815 			if (r == -1) {
   4816 				doio_fprintf(stderr,
   4817 					     "aio_suspend failed: %s (%d)\n",
   4818 					     SYSERR, errno);
   4819 				exit(E_SETUP);
   4820 			}
   4821 			cnt++;
   4822 		} while (aiop->done == 0);
   4823 
   4824 #if 0
   4825 		/*
   4826 		 * after having this set for a while, I've decided that
   4827 		 * it's too noisy
   4828 		 */
   4829 		if (cnt > 1)
   4830 			doio_fprintf(stderr,
   4831 				     "aio_wait: callback wait took %d tries\n",
   4832 				     cnt);
   4833 #endif
   4834 
   4835 		/*
   4836 		 * Note: cb_handler already calls aio_done
   4837 		 */
   4838 		break;
   4839 
   4840 	case A_SUSPEND:
   4841 		aioary[0] = &aiop->aiocb;
   4842 		r = aio_suspend(aioary, 1, NULL);
   4843 		if (r == -1) {
   4844 			doio_fprintf(stderr, "aio_suspend failed: %s (%d)\n",
   4845 				     SYSERR, errno);
   4846 			exit(E_SETUP);
   4847 		}
   4848 
   4849 		aio_done(aiop);
   4850 		break;
   4851 #endif
   4852 	}
   4853 
   4854 /*printf("aio_wait: errno %d return %d\n", aiop->aio_errno, aiop->aio_ret);*/
   4855 
   4856 	return 0;
   4857 }
   4858 #endif /* !linux */
   4859 
   4860 /*
   4861  * Format specified time into HH:MM:SS format.  t is the time to format
   4862  * in seconds (as returned from time(2)).
   4863  */
   4864 
   4865 char *hms(time_t t)
   4866 {
   4867 	static char ascii_time[9];
   4868 	struct tm *ltime;
   4869 
   4870 	ltime = localtime(&t);
   4871 	strftime(ascii_time, sizeof(ascii_time), "%H:%M:%S", ltime);
   4872 
   4873 	return ascii_time;
   4874 }
   4875 
   4876 /*
   4877  * Simple routine to check if an async io request has completed.
   4878  */
   4879 
   4880 int aio_done(struct aio_info *ainfo)
   4881 {
   4882 #ifdef CRAY
   4883 	return ainfo->iosw.sw_flag;
   4884 #endif
   4885 
   4886 #ifdef sgi
   4887 	if ((ainfo->aio_errno = aio_error(&ainfo->aiocb)) == -1) {
   4888 		doio_fprintf(stderr, "aio_done: aio_error failed: %s (%d)\n",
   4889 			     SYSERR, errno);
   4890 		exit(E_SETUP);
   4891 	}
   4892 	/*printf("%d aio_done aio_errno=%d\n", getpid(), ainfo->aio_errno); */
   4893 	if (ainfo->aio_errno != EINPROGRESS) {
   4894 		if ((ainfo->aio_ret = aio_return(&ainfo->aiocb)) == -1) {
   4895 			doio_fprintf(stderr,
   4896 				     "aio_done: aio_return failed: %s (%d)\n",
   4897 				     SYSERR, errno);
   4898 			exit(E_SETUP);
   4899 		}
   4900 	}
   4901 
   4902 	return (ainfo->aio_errno != EINPROGRESS);
   4903 #else
   4904 	return -1;		/* invalid */
   4905 #endif
   4906 }
   4907 
   4908 /*
   4909  * Routine to handle upanic() - it first attempts to set the panic flag.  If
   4910  * the flag cannot be set, an error message is issued.  A call to upanic
   4911  * with PA_PANIC is then done unconditionally, in case the panic flag was set
   4912  * from outside the program (as with the panic(8) program).
   4913  *
   4914  * Note - we only execute the upanic code if -U was used, and the passed in
   4915  * mask is set in the Upanic_Conditions bitmask.
   4916  */
   4917 
   4918 void doio_upanic(int mask)
   4919 {
   4920 	if (U_opt == 0 || (mask & Upanic_Conditions) == 0) {
   4921 		return;
   4922 	}
   4923 #ifdef CRAY
   4924 	if (upanic(PA_SET) < 0) {
   4925 		doio_fprintf(stderr,
   4926 			     "WARNING - Could not set the panic flag - upanic(PA_SET) failed:  %s (%d)\n",
   4927 			     SYSERR, errno);
   4928 	}
   4929 
   4930 	upanic(PA_PANIC);
   4931 #endif
   4932 #ifdef sgi
   4933 	syssgi(1005);		/* syssgi test panic - DEBUG kernels only */
   4934 #endif
   4935 	doio_fprintf(stderr, "WARNING - upanic() failed\n");
   4936 }
   4937 
   4938 /*
   4939  * Parse cmdline options/arguments and set appropriate global variables.
   4940  * If the cmdline is valid, return 0 to caller.  Otherwise exit with a status
   4941  * of 1.
   4942  */
   4943 
   4944 int parse_cmdline(int argc, char **argv, char *opts)
   4945 {
   4946 	int c;
   4947 	char cc, *cp = NULL, *tok = NULL;
   4948 	extern int opterr;
   4949 	extern int optind;
   4950 	extern char *optarg;
   4951 	struct smap *s;
   4952 	char *memargs[NMEMALLOC];
   4953 	int nmemargs, ma;
   4954 
   4955 	if (*argv[0] == '-') {
   4956 		argv[0]++;
   4957 		Execd = 1;
   4958 	}
   4959 
   4960 	if ((Prog = strrchr(argv[0], '/')) == NULL) {
   4961 		Prog = argv[0];
   4962 	} else {
   4963 		Prog++;
   4964 	}
   4965 
   4966 	opterr = 0;
   4967 	while ((c = getopt(argc, argv, opts)) != EOF) {
   4968 		switch ((char)c) {
   4969 		case 'a':
   4970 			a_opt++;
   4971 			break;
   4972 
   4973 		case 'C':
   4974 			C_opt++;
   4975 			for (s = checkmap; s->string != NULL; s++)
   4976 				if (!strcmp(s->string, optarg))
   4977 					break;
   4978 			if (s->string == NULL && tok != NULL) {
   4979 				fprintf(stderr,
   4980 					"%s%s:  Illegal -C arg (%s).  Must be one of: ",
   4981 					Prog, TagName, tok);
   4982 
   4983 				for (s = checkmap; s->string != NULL; s++)
   4984 					fprintf(stderr, "%s ", s->string);
   4985 				fprintf(stderr, "\n");
   4986 				exit(1);
   4987 			}
   4988 
   4989 			switch (s->value) {
   4990 			case C_DEFAULT:
   4991 				Data_Fill = doio_pat_fill;
   4992 				Data_Check = doio_pat_check;
   4993 				break;
   4994 			default:
   4995 				fprintf(stderr,
   4996 					"%s%s:  Unrecognised -C arg '%s' %d",
   4997 					Prog, TagName, s->string, s->value);
   4998 				exit(1);
   4999 			}
   5000 			break;
   5001 
   5002 		case 'd':	/* delay between i/o ops */
   5003 			parse_delay(optarg);
   5004 			break;
   5005 
   5006 		case 'e':
   5007 			if (Npes > 1 && Nprocs > 1) {
   5008 				fprintf(stderr,
   5009 					"%s%s:  Warning - Program is a multi-pe application - exec option is ignored.\n",
   5010 					Prog, TagName);
   5011 			}
   5012 			e_opt++;
   5013 			break;
   5014 
   5015 		case 'h':
   5016 			help(stdout);
   5017 			exit(0);
   5018 			break;
   5019 
   5020 		case 'k':
   5021 			k_opt++;
   5022 			break;
   5023 
   5024 		case 'm':
   5025 			Message_Interval = strtol(optarg, &cp, 10);
   5026 			if (*cp != '\0' || Message_Interval < 0) {
   5027 				fprintf(stderr,
   5028 					"%s%s:  Illegal -m arg (%s):  Must be an integer >= 0\n",
   5029 					Prog, TagName, optarg);
   5030 				exit(1);
   5031 			}
   5032 			m_opt++;
   5033 			break;
   5034 
   5035 		case 'M':	/* memory allocation types */
   5036 #ifndef CRAY
   5037 			nmemargs = string_to_tokens(optarg, memargs, 32, ",");
   5038 			for (ma = 0; ma < nmemargs; ma++) {
   5039 				parse_memalloc(memargs[ma]);
   5040 			}
   5041 			/*dump_memalloc(); */
   5042 #else
   5043 			fprintf(stderr,
   5044 				"%s%s: Error: -M isn't supported on this platform\n",
   5045 				Prog, TagName);
   5046 			exit(1);
   5047 #endif
   5048 			M_opt++;
   5049 			break;
   5050 
   5051 		case 'N':
   5052 			sprintf(TagName, "(%.39s)", optarg);
   5053 			break;
   5054 
   5055 		case 'n':
   5056 			Nprocs = strtol(optarg, &cp, 10);
   5057 			if (*cp != '\0' || Nprocs < 1) {
   5058 				fprintf(stderr,
   5059 					"%s%s:  Illegal -n arg (%s):  Must be integer > 0\n",
   5060 					Prog, TagName, optarg);
   5061 				exit(E_USAGE);
   5062 			}
   5063 
   5064 			if (Npes > 1 && Nprocs > 1) {
   5065 				fprintf(stderr,
   5066 					"%s%s:  Program has been built as a multi-pe app.  -n1 is the only nprocs value allowed\n",
   5067 					Prog, TagName);
   5068 				exit(E_SETUP);
   5069 			}
   5070 			n_opt++;
   5071 			break;
   5072 
   5073 		case 'r':
   5074 			Release_Interval = strtol(optarg, &cp, 10);
   5075 			if (*cp != '\0' || Release_Interval < 0) {
   5076 				fprintf(stderr,
   5077 					"%s%s:  Illegal -r arg (%s):  Must be integer >= 0\n",
   5078 					Prog, TagName, optarg);
   5079 				exit(E_USAGE);
   5080 			}
   5081 
   5082 			r_opt++;
   5083 			break;
   5084 
   5085 		case 'w':
   5086 			Write_Log = optarg;
   5087 			w_opt++;
   5088 			break;
   5089 
   5090 		case 'v':
   5091 			v_opt++;
   5092 			break;
   5093 
   5094 		case 'V':
   5095 			if (strcasecmp(optarg, "sync") == 0) {
   5096 				Validation_Flags = O_SYNC;
   5097 			} else if (strcasecmp(optarg, "buffered") == 0) {
   5098 				Validation_Flags = 0;
   5099 #ifdef CRAY
   5100 			} else if (strcasecmp(optarg, "parallel") == 0) {
   5101 				Validation_Flags = O_PARALLEL;
   5102 			} else if (strcasecmp(optarg, "ldraw") == 0) {
   5103 				Validation_Flags = O_LDRAW;
   5104 			} else if (strcasecmp(optarg, "raw") == 0) {
   5105 				Validation_Flags = O_RAW;
   5106 #endif
   5107 #ifdef sgi
   5108 			} else if (strcasecmp(optarg, "direct") == 0) {
   5109 				Validation_Flags = O_DIRECT;
   5110 #endif
   5111 			} else {
   5112 				if (sscanf
   5113 				    (optarg, "%i%c", &Validation_Flags,
   5114 				     &cc) != 1) {
   5115 					fprintf(stderr,
   5116 						"%s:  Invalid -V argument (%s) - must be a decimal, hex, or octal\n",
   5117 						Prog, optarg);
   5118 					fprintf(stderr,
   5119 						"    number, or one of the following strings:  'sync',\n");
   5120 					fprintf(stderr,
   5121 						"    'buffered', 'parallel', 'ldraw', or 'raw'\n");
   5122 					exit(E_USAGE);
   5123 				}
   5124 			}
   5125 			V_opt++;
   5126 			break;
   5127 		case 'U':
   5128 			tok = strtok(optarg, ",");
   5129 			while (tok != NULL) {
   5130 				for (s = Upanic_Args; s->string != NULL; s++)
   5131 					if (strcmp(s->string, tok) == 0)
   5132 						break;
   5133 
   5134 				if (s->string == NULL) {
   5135 					fprintf(stderr,
   5136 						"%s%s:  Illegal -U arg (%s).  Must be one of: ",
   5137 						Prog, TagName, tok);
   5138 
   5139 					for (s = Upanic_Args; s->string != NULL;
   5140 					     s++)
   5141 						fprintf(stderr, "%s ",
   5142 							s->string);
   5143 
   5144 					fprintf(stderr, "\n");
   5145 
   5146 					exit(1);
   5147 				}
   5148 
   5149 				Upanic_Conditions |= s->value;
   5150 				tok = strtok(NULL, ",");
   5151 			}
   5152 
   5153 			U_opt++;
   5154 			break;
   5155 
   5156 		case '?':
   5157 			usage(stderr);
   5158 			exit(E_USAGE);
   5159 			break;
   5160 		}
   5161 	}
   5162 
   5163 	/*
   5164 	 * Supply defaults
   5165 	 */
   5166 
   5167 	if (!C_opt) {
   5168 		Data_Fill = doio_pat_fill;
   5169 		Data_Check = doio_pat_check;
   5170 	}
   5171 
   5172 	if (!U_opt)
   5173 		Upanic_Conditions = 0;
   5174 
   5175 	if (!n_opt)
   5176 		Nprocs = 1;
   5177 
   5178 	if (!r_opt)
   5179 		Release_Interval = DEF_RELEASE_INTERVAL;
   5180 
   5181 	if (!M_opt) {
   5182 		Memalloc[Nmemalloc].memtype = MEM_DATA;
   5183 		Memalloc[Nmemalloc].flags = 0;
   5184 		Memalloc[Nmemalloc].name = NULL;
   5185 		Memalloc[Nmemalloc].space = NULL;
   5186 		Nmemalloc++;
   5187 	}
   5188 
   5189 	/*
   5190 	 * Initialize input stream
   5191 	 */
   5192 
   5193 	if (argc == optind) {
   5194 		Infile = NULL;
   5195 	} else {
   5196 		Infile = argv[optind++];
   5197 	}
   5198 
   5199 	if (argc != optind) {
   5200 		usage(stderr);
   5201 		exit(E_USAGE);
   5202 	}
   5203 
   5204 	return 0;
   5205 }
   5206 
   5207 /*
   5208  * Parse memory allocation types
   5209  *
   5210  * Types are:
   5211  *  Data
   5212  *  T3E-shmem:blksize[:nblks]
   5213  *  SysV-shmem:shmid:blksize:nblks
   5214  *	if shmid is "private", use IPC_PRIVATE
   5215  *	and nblks is not required
   5216  *
   5217  *  mmap:flags:filename:blksize[:nblks]
   5218  *   flags are one of:
   5219  *	p - private (MAP_PRIVATE)
   5220  *	a - private, MAP_AUTORESRV
   5221  *	l - local (MAP_LOCAL)
   5222  *	s - shared (nblks required)
   5223  *
   5224  *   plus any of:
   5225  *	f - fixed address (MAP_FIXED)
   5226  *	A - use an address without MAP_FIXED
   5227  *	a - autogrow (map once at startup)
   5228  *
   5229  *  mmap:flags:devzero
   5230  *	mmap /dev/zero  (shared not allowd)
   5231  *	maps the first 4096 bytes of /dev/zero
   5232  *
   5233  * - put a directory at the beginning of the shared
   5234  *   regions saying what pid has what region.
   5235  *	DIRMAGIC
   5236  *	BLKSIZE
   5237  *	NBLKS
   5238  *	nblks worth of directories - 1 int pids
   5239  */
   5240 #ifndef CRAY
   5241 void parse_memalloc(char *arg)
   5242 {
   5243 	char *allocargs[NMEMALLOC];
   5244 	int nalloc;
   5245 	struct memalloc *M;
   5246 
   5247 	if (Nmemalloc >= NMEMALLOC) {
   5248 		doio_fprintf(stderr, "Error - too many memory types (%d).\n",
   5249 			     Nmemalloc);
   5250 		return;
   5251 	}
   5252 
   5253 	M = &Memalloc[Nmemalloc];
   5254 
   5255 	nalloc = string_to_tokens(arg, allocargs, 32, ":");
   5256 	if (!strcmp(allocargs[0], "data")) {
   5257 		M->memtype = MEM_DATA;
   5258 		M->flags = 0;
   5259 		M->name = NULL;
   5260 		M->space = NULL;
   5261 		Nmemalloc++;
   5262 		if (nalloc >= 2) {
   5263 			if (strchr(allocargs[1], 'p'))
   5264 				M->flags |= MEMF_MPIN;
   5265 		}
   5266 	} else if (!strcmp(allocargs[0], "mmap")) {
   5267 		/* mmap:flags:filename[:size] */
   5268 		M->memtype = MEM_MMAP;
   5269 		M->flags = 0;
   5270 		M->space = NULL;
   5271 		if (nalloc >= 1) {
   5272 			if (strchr(allocargs[1], 'p'))
   5273 				M->flags |= MEMF_PRIVATE;
   5274 			if (strchr(allocargs[1], 'a'))
   5275 				M->flags |= MEMF_AUTORESRV;
   5276 			if (strchr(allocargs[1], 'l'))
   5277 				M->flags |= MEMF_LOCAL;
   5278 			if (strchr(allocargs[1], 's'))
   5279 				M->flags |= MEMF_SHARED;
   5280 
   5281 			if (strchr(allocargs[1], 'f'))
   5282 				M->flags |= MEMF_FIXADDR;
   5283 			if (strchr(allocargs[1], 'A'))
   5284 				M->flags |= MEMF_ADDR;
   5285 			if (strchr(allocargs[1], 'G'))
   5286 				M->flags |= MEMF_AUTOGROW;
   5287 
   5288 			if (strchr(allocargs[1], 'U'))
   5289 				M->flags |= MEMF_FILE;
   5290 		} else {
   5291 			M->flags |= MEMF_PRIVATE;
   5292 		}
   5293 
   5294 		if (nalloc > 2) {
   5295 			if (!strcmp(allocargs[2], "devzero")) {
   5296 				M->name = "/dev/zero";
   5297 				if (M->flags &
   5298 				    ((MEMF_PRIVATE | MEMF_LOCAL) == 0))
   5299 					M->flags |= MEMF_PRIVATE;
   5300 			} else {
   5301 				M->name = allocargs[2];
   5302 			}
   5303 		} else {
   5304 			M->name = "/dev/zero";
   5305 			if (M->flags & ((MEMF_PRIVATE | MEMF_LOCAL) == 0))
   5306 				M->flags |= MEMF_PRIVATE;
   5307 		}
   5308 		Nmemalloc++;
   5309 
   5310 	} else if (!strcmp(allocargs[0], "shmem")) {
   5311 		/* shmem:shmid:size */
   5312 		M->memtype = MEM_SHMEM;
   5313 		M->flags = 0;
   5314 		M->space = NULL;
   5315 		if (nalloc >= 2) {
   5316 			M->name = allocargs[1];
   5317 		} else {
   5318 			M->name = NULL;
   5319 		}
   5320 		if (nalloc >= 3) {
   5321 			sscanf(allocargs[2], "%i", &M->nblks);
   5322 		} else {
   5323 			M->nblks = 0;
   5324 		}
   5325 		if (nalloc >= 4) {
   5326 			if (strchr(allocargs[3], 'p'))
   5327 				M->flags |= MEMF_MPIN;
   5328 		}
   5329 
   5330 		Nmemalloc++;
   5331 	} else {
   5332 		doio_fprintf(stderr, "Error - unknown memory type '%s'.\n",
   5333 			     allocargs[0]);
   5334 		exit(1);
   5335 	}
   5336 }
   5337 
   5338 void dump_memalloc(void)
   5339 {
   5340 	int ma;
   5341 	char *mt;
   5342 
   5343 	if (Nmemalloc == 0) {
   5344 		printf("No memory allocation strategies devined\n");
   5345 		return;
   5346 	}
   5347 
   5348 	for (ma = 0; ma < Nmemalloc; ma++) {
   5349 		switch (Memalloc[ma].memtype) {
   5350 		case MEM_DATA:
   5351 			mt = "data";
   5352 			break;
   5353 		case MEM_SHMEM:
   5354 			mt = "shmem";
   5355 			break;
   5356 		case MEM_MMAP:
   5357 			mt = "mmap";
   5358 			break;
   5359 		default:
   5360 			mt = "unknown";
   5361 			break;
   5362 		}
   5363 		printf("mstrat[%d] = %d %s\n", ma, Memalloc[ma].memtype, mt);
   5364 		printf("\tflags=%#o name='%s' nblks=%d\n",
   5365 		       Memalloc[ma].flags,
   5366 		       Memalloc[ma].name, Memalloc[ma].nblks);
   5367 	}
   5368 }
   5369 
   5370 #endif /* !CRAY */
   5371 
   5372 /*
   5373  * -d <op>:<time> - doio inter-operation delay
   5374  *	currently this permits ONE type of delay between operations.
   5375  */
   5376 
   5377 void parse_delay(char *arg)
   5378 {
   5379 	char *delayargs[NMEMALLOC];
   5380 	int ndelay;
   5381 	struct smap *s;
   5382 
   5383 	ndelay = string_to_tokens(arg, delayargs, 32, ":");
   5384 	if (ndelay < 2) {
   5385 		doio_fprintf(stderr,
   5386 			     "Illegal delay arg (%s). Must be operation:time\n",
   5387 			     arg);
   5388 		exit(1);
   5389 	}
   5390 	for (s = delaymap; s->string != NULL; s++)
   5391 		if (!strcmp(s->string, delayargs[0]))
   5392 			break;
   5393 	if (s->string == NULL) {
   5394 		fprintf(stderr,
   5395 			"Illegal Delay arg (%s).  Must be one of: ", arg);
   5396 
   5397 		for (s = delaymap; s->string != NULL; s++)
   5398 			fprintf(stderr, "%s ", s->string);
   5399 		fprintf(stderr, "\n");
   5400 		exit(1);
   5401 	}
   5402 
   5403 	delayop = s->value;
   5404 
   5405 	sscanf(delayargs[1], "%i", &delaytime);
   5406 
   5407 	if (ndelay > 2) {
   5408 		fprintf(stderr, "Warning: extra delay arguments ignored.\n");
   5409 	}
   5410 }
   5411 
   5412 /*
   5413  * Usage clause - obvious
   5414  */
   5415 
   5416 int usage(FILE * stream)
   5417 {
   5418 	/*
   5419 	 * Only do this if we are on vpe 0, to avoid seeing it from every
   5420 	 * process in the application.
   5421 	 */
   5422 
   5423 	if (Npes > 1 && Vpe != 0) {
   5424 		return 0;
   5425 	}
   5426 
   5427 	fprintf(stream,
   5428 		"usage%s:  %s [-aekv] [-m message_interval] [-n nprocs] [-r release_interval] [-w write_log] [-V validation_ftype] [-U upanic_cond] [infile]\n",
   5429 		TagName, Prog);
   5430 	return 0;
   5431 }
   5432 
   5433 void help(FILE * stream)
   5434 {
   5435 	/*
   5436 	 * Only the app running on vpe 0 gets to issue help - this prevents
   5437 	 * everybody in the application from doing this.
   5438 	 */
   5439 
   5440 	if (Npes > 1 && Vpe != 0) {
   5441 		return;
   5442 	}
   5443 
   5444 	usage(stream);
   5445 	fprintf(stream, "\n");
   5446 	fprintf(stream,
   5447 		"\t-a                   abort - kill all doio processes on data compare\n");
   5448 	fprintf(stream,
   5449 		"\t                     errors.  Normally only the erroring process exits\n");
   5450 	fprintf(stream, "\t-C data-pattern-type \n");
   5451 	fprintf(stream,
   5452 		"\t                     Available data patterns are:\n");
   5453 	fprintf(stream, "\t                     default - repeating pattern\n");
   5454 	fprintf(stream, "\t-d Operation:Time    Inter-operation delay.\n");
   5455 	fprintf(stream, "\t                     Operations are:\n");
   5456 	fprintf(stream,
   5457 		"\t                         select:time (1 second=1000000)\n");
   5458 	fprintf(stream, "\t                         sleep:time (1 second=1)\n");
   5459 #ifdef sgi
   5460 	fprintf(stream,
   5461 		"\t                         sginap:time (1 second=CLK_TCK=100)\n");
   5462 #endif
   5463 	fprintf(stream, "\t                         alarm:time (1 second=1)\n");
   5464 	fprintf(stream,
   5465 		"\t-e                   Re-exec children before entering the main\n");
   5466 	fprintf(stream,
   5467 		"\t                     loop.  This is useful for spreading\n");
   5468 	fprintf(stream,
   5469 		"\t                     procs around on multi-pe systems.\n");
   5470 	fprintf(stream,
   5471 		"\t-k                   Lock file regions during writes using fcntl()\n");
   5472 	fprintf(stream,
   5473 		"\t-v                   Verify writes - this is done by doing a buffered\n");
   5474 	fprintf(stream,
   5475 		"\t                     read() of the data if file io was done, or\n");
   5476 	fprintf(stream,
   5477 		"\t                     an ssread()of the data if sds io was done\n");
   5478 #ifndef CRAY
   5479 	fprintf(stream,
   5480 		"\t-M                   Data buffer allocation method\n");
   5481 	fprintf(stream, "\t                     alloc-type[,type]\n");
   5482 #ifdef sgi
   5483 	fprintf(stream, "\t			    data:flags\n");
   5484 	fprintf(stream, "\t			        p - mpin buffer\n");
   5485 	fprintf(stream, "\t			    shmem:shmid:size:flags\n");
   5486 	fprintf(stream, "\t			        p - mpin buffer\n");
   5487 #else
   5488 	fprintf(stream, "\t			    data\n");
   5489 	fprintf(stream, "\t			    shmem:shmid:size\n");
   5490 #endif /* sgi */
   5491 	fprintf(stream, "\t			    mmap:flags:filename\n");
   5492 	fprintf(stream, "\t			        p - private\n");
   5493 #ifdef sgi
   5494 	fprintf(stream, "\t			        s - shared\n");
   5495 	fprintf(stream, "\t			        l - local\n");
   5496 	fprintf(stream, "\t			        a - autoresrv\n");
   5497 	fprintf(stream, "\t			        G - autogrow\n");
   5498 #else
   5499 	fprintf(stream,
   5500 		"\t			        s - shared (shared file must exist\n"),
   5501 	    fprintf(stream,
   5502 		    "\t			            and have needed length)\n");
   5503 #endif
   5504 	fprintf(stream,
   5505 		"\t			        f - fixed address (not used)\n");
   5506 	fprintf(stream,
   5507 		"\t			        a - specify address (not used)\n");
   5508 	fprintf(stream,
   5509 		"\t			        U - Unlink file when done\n");
   5510 	fprintf(stream,
   5511 		"\t			        The default flag is private\n");
   5512 	fprintf(stream, "\n");
   5513 #endif /* !CRAY */
   5514 	fprintf(stream,
   5515 		"\t-m message_interval  Generate a message every 'message_interval'\n");
   5516 	fprintf(stream,
   5517 		"\t                     requests.  An interval of 0 suppresses\n");
   5518 	fprintf(stream,
   5519 		"\t                     messages.  The default is 0.\n");
   5520 	fprintf(stream, "\t-N tagname           Tag name, for Monster.\n");
   5521 	fprintf(stream, "\t-n nprocs            # of processes to start up\n");
   5522 	fprintf(stream,
   5523 		"\t-r release_interval  Release all memory and close\n");
   5524 	fprintf(stream,
   5525 		"\t                     files every release_interval operations.\n");
   5526 	fprintf(stream,
   5527 		"\t                     By default procs never release memory\n");
   5528 	fprintf(stream,
   5529 		"\t                     or close fds unless they have to.\n");
   5530 	fprintf(stream,
   5531 		"\t-V validation_ftype  The type of file descriptor to use for doing data\n");
   5532 	fprintf(stream,
   5533 		"\t                     validation.  validation_ftype may be an octal,\n");
   5534 	fprintf(stream,
   5535 		"\t                     hex, or decimal number representing the open()\n");
   5536 	fprintf(stream,
   5537 		"\t                     flags, or may be one of the following strings:\n");
   5538 	fprintf(stream,
   5539 		"\t                     'buffered' - validate using bufferd read\n");
   5540 	fprintf(stream,
   5541 		"\t                     'sync'     - validate using O_SYNC read\n");
   5542 #ifdef sgi
   5543 	fprintf(stream,
   5544 		"\t                     'direct    - validate using O_DIRECT read'\n");
   5545 #endif
   5546 #ifdef CRAY
   5547 	fprintf(stream,
   5548 		"\t                     'ldraw'    - validate using O_LDRAW read\n");
   5549 	fprintf(stream,
   5550 		"\t                     'parallel' - validate using O_PARALLEL read\n");
   5551 	fprintf(stream,
   5552 		"\t                     'raw'      - validate using O_RAW read\n");
   5553 #endif
   5554 	fprintf(stream, "\t                     By default, 'parallel'\n");
   5555 	fprintf(stream,
   5556 		"\t                     is used if the write was done with O_PARALLEL\n");
   5557 	fprintf(stream,
   5558 		"\t                     or 'buffered' for all other writes.\n");
   5559 	fprintf(stream,
   5560 		"\t-w write_log         File to log file writes to.  The doio_check\n");
   5561 	fprintf(stream,
   5562 		"\t                     program can reconstruct datafiles using the\n");
   5563 	fprintf(stream,
   5564 		"\t                     write_log, and detect if a file is corrupt\n");
   5565 	fprintf(stream,
   5566 		"\t                     after all procs have exited.\n");
   5567 	fprintf(stream,
   5568 		"\t-U upanic_cond       Comma separated list of conditions that will\n");
   5569 	fprintf(stream,
   5570 		"\t                     cause a call to upanic(PA_PANIC).\n");
   5571 	fprintf(stream,
   5572 		"\t                     'corruption' -> upanic on bad data comparisons\n");
   5573 	fprintf(stream,
   5574 		"\t                     'iosw'     ---> upanic on unexpected async iosw\n");
   5575 	fprintf(stream,
   5576 		"\t                     'rval'     ---> upanic on unexpected syscall rvals\n");
   5577 	fprintf(stream,
   5578 		"\t                     'all'      ---> all of the above\n");
   5579 	fprintf(stream, "\n");
   5580 	fprintf(stream,
   5581 		"\tinfile               Input stream - default is stdin - must be a list\n");
   5582 	fprintf(stream,
   5583 		"\t                     of io_req structures (see doio.h).  Currently\n");
   5584 	fprintf(stream,
   5585 		"\t                     only the iogen program generates the proper\n");
   5586 	fprintf(stream, "\t                     format\n");
   5587 }
   5588