Home | History | Annotate | Download | only in blktrace
      1 /*
      2  * block queue tracing application
      3  *
      4  * Copyright (C) 2005 Jens Axboe <axboe (at) suse.de>
      5  * Copyright (C) 2006 Jens Axboe <axboe (at) kernel.dk>
      6  *
      7  * Rewrite to have a single thread per CPU (managing all devices on that CPU)
      8  *	Alan D. Brunelle <alan.brunelle (at) hp.com> - January 2009
      9  *
     10  *  This program is free software; you can redistribute it and/or modify
     11  *  it under the terms of the GNU General Public License as published by
     12  *  the Free Software Foundation; either version 2 of the License, or
     13  *  (at your option) any later version.
     14  *
     15  *  This program is distributed in the hope that it will be useful,
     16  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     17  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     18  *  GNU General Public License for more details.
     19  *
     20  *  You should have received a copy of the GNU General Public License
     21  *  along with this program; if not, write to the Free Software
     22  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
     23  *
     24  */
     25 
     26 #include <errno.h>
     27 #include <stdarg.h>
     28 #include <stdio.h>
     29 #include <stdlib.h>
     30 #include <string.h>
     31 #include <fcntl.h>
     32 #include <getopt.h>
     33 #include <sched.h>
     34 #include <unistd.h>
     35 #include <poll.h>
     36 #include <signal.h>
     37 #include <pthread.h>
     38 #include <locale.h>
     39 #include <sys/ioctl.h>
     40 #include <sys/types.h>
     41 #include <sys/stat.h>
     42 #include <sys/vfs.h>
     43 #include <sys/mman.h>
     44 #include <sys/param.h>
     45 #include <sys/time.h>
     46 #include <sys/resource.h>
     47 #include <sys/socket.h>
     48 #include <netinet/in.h>
     49 #include <arpa/inet.h>
     50 #include <netdb.h>
     51 #include <sys/sendfile.h>
     52 
     53 #include "btt/list.h"
     54 #include "blktrace.h"
     55 
     56 /*
     57  * You may want to increase this even more, if you are logging at a high
     58  * rate and see skipped/missed events
     59  */
     60 #define BUF_SIZE		(512 * 1024)
     61 #define BUF_NR			(4)
     62 
     63 #define FILE_VBUF_SIZE		(128 * 1024)
     64 
     65 #define DEBUGFS_TYPE		(0x64626720)
     66 #define TRACE_NET_PORT		(8462)
     67 
     68 enum {
     69 	Net_none = 0,
     70 	Net_server,
     71 	Net_client,
     72 };
     73 
     74 enum thread_status {
     75 	Th_running,
     76 	Th_leaving,
     77 	Th_error
     78 };
     79 
     80 /*
     81  * Generic stats collected: nevents can be _roughly_ estimated by data_read
     82  * (discounting pdu...)
     83  *
     84  * These fields are updated w/ pdc_dr_update & pdc_nev_update below.
     85  */
     86 struct pdc_stats {
     87 	unsigned long long data_read;
     88 	unsigned long long nevents;
     89 };
     90 
     91 struct devpath {
     92 	struct list_head head;
     93 	char *path;			/* path to device special file */
     94 	char *buts_name;		/* name returned from bt kernel code */
     95 	struct pdc_stats *stats;
     96 	int fd, ncpus;
     97 	unsigned long long drops;
     98 
     99 	/*
    100 	 * For piped output only:
    101 	 *
    102 	 * Each tracer will have a tracer_devpath_head that it will add new
    103 	 * data onto. It's list is protected above (tracer_devpath_head.mutex)
    104 	 * and it will signal the processing thread using the dp_cond,
    105 	 * dp_mutex & dp_entries variables above.
    106 	 */
    107 	struct tracer_devpath_head *heads;
    108 
    109 	/*
    110 	 * For network server mode only:
    111 	 */
    112 	struct cl_host *ch;
    113 	u32 cl_id;
    114 	time_t cl_connect_time;
    115 	struct io_info *ios;
    116 };
    117 
    118 /*
    119  * For piped output to stdout we will have each tracer thread (one per dev)
    120  * tack buffers read from the relay queues on a per-device list.
    121  *
    122  * The main thread will then collect trace buffers from each of lists in turn.
    123  *
    124  * We will use a mutex to guard each of the trace_buf list. The tracers
    125  * can then signal the main thread using <dp_cond,dp_mutex> and
    126  * dp_entries. (When dp_entries is 0, and a tracer adds an entry it will
    127  * signal. When dp_entries is 0, the main thread will wait for that condition
    128  * to be signalled.)
    129  *
    130  * adb: It may be better just to have a large buffer per tracer per dev,
    131  * and then use it as a ring-buffer. This would certainly cut down a lot
    132  * of malloc/free thrashing, at the cost of more memory movements (potentially).
    133  */
    134 struct trace_buf {
    135 	struct list_head head;
    136 	struct devpath *dpp;
    137 	void *buf;
    138 	int cpu, len;
    139 };
    140 
    141 struct tracer_devpath_head {
    142 	pthread_mutex_t mutex;
    143 	struct list_head head;
    144 	struct trace_buf *prev;
    145 };
    146 
    147 /*
    148  * Used to handle the mmap() interfaces for output file (containing traces)
    149  */
    150 struct mmap_info {
    151 	void *fs_buf;
    152 	unsigned long long fs_size, fs_max_size, fs_off, fs_buf_len;
    153 	unsigned long buf_size, buf_nr;
    154 	int pagesize;
    155 };
    156 
    157 /*
    158  * Each thread doing work on a (client) side of blktrace will have one
    159  * of these. The ios array contains input/output information, pfds holds
    160  * poll() data. The volatile's provide flags to/from the main executing
    161  * thread.
    162  */
    163 struct tracer {
    164 	struct list_head head;
    165 	struct io_info *ios;
    166 	struct pollfd *pfds;
    167 	pthread_t thread;
    168 	int cpu, nios;
    169 	volatile int status, is_done;
    170 };
    171 
    172 /*
    173  * networking stuff follows. we include a magic number so we know whether
    174  * to endianness convert or not.
    175  *
    176  * The len field is overloaded:
    177  *	0 - Indicates an "open" - allowing the server to set up for a dev/cpu
    178  *	1 - Indicates a "close" - Shut down connection orderly
    179  *
    180  * The cpu field is overloaded on close: it will contain the number of drops.
    181  */
    182 struct blktrace_net_hdr {
    183 	u32 magic;		/* same as trace magic */
    184 	char buts_name[32];	/* trace name */
    185 	u32 cpu;		/* for which cpu */
    186 	u32 max_cpus;
    187 	u32 len;		/* length of following trace data */
    188 	u32 cl_id;		/* id for set of client per-cpu connections */
    189 	u32 buf_size;		/* client buf_size for this trace  */
    190 	u32 buf_nr;		/* client buf_nr for this trace  */
    191 	u32 page_size;		/* client page_size for this trace  */
    192 };
    193 
    194 /*
    195  * Each host encountered has one of these. The head is used to link this
    196  * on to the network server's ch_list. Connections associated with this
    197  * host are linked on conn_list, and any devices traced on that host
    198  * are connected on the devpaths list.
    199  */
    200 struct cl_host {
    201 	struct list_head head;
    202 	struct list_head conn_list;
    203 	struct list_head devpaths;
    204 	struct net_server_s *ns;
    205 	char *hostname;
    206 	struct in_addr cl_in_addr;
    207 	int connects, ndevs, cl_opens;
    208 };
    209 
    210 /*
    211  * Each connection (client to server socket ('fd')) has one of these. A
    212  * back reference to the host ('ch'), and lists headers (for the host
    213  * list, and the network server conn_list) are also included.
    214  */
    215 struct cl_conn {
    216 	struct list_head ch_head, ns_head;
    217 	struct cl_host *ch;
    218 	int fd, ncpus;
    219 	time_t connect_time;
    220 };
    221 
    222 /*
    223  * The network server requires some poll structures to be maintained -
    224  * one per conection currently on conn_list. The nchs/ch_list values
    225  * are for each host connected to this server. The addr field is used
    226  * for scratch as new connections are established.
    227  */
    228 struct net_server_s {
    229 	struct list_head conn_list;
    230 	struct list_head ch_list;
    231 	struct pollfd *pfds;
    232 	int listen_fd, connects, nchs;
    233 	struct sockaddr_in addr;
    234 };
    235 
    236 /*
    237  * This structure is (generically) used to providide information
    238  * for a read-to-write set of values.
    239  *
    240  * ifn & ifd represent input information
    241  *
    242  * ofn, ofd, ofp, obuf & mmap_info are used for output file (optionally).
    243  */
    244 struct io_info {
    245 	struct devpath *dpp;
    246 	FILE *ofp;
    247 	char *obuf;
    248 	struct cl_conn *nc;	/* Server network connection */
    249 
    250 	/*
    251 	 * mmap controlled output files
    252 	 */
    253 	struct mmap_info mmap_info;
    254 
    255 	/*
    256 	 * Client network fields
    257 	 */
    258 	unsigned int ready;
    259 	unsigned long long data_queued;
    260 
    261 	/*
    262 	 * Input/output file descriptors & names
    263 	 */
    264 	int ifd, ofd;
    265 	char ifn[MAXPATHLEN + 64];
    266 	char ofn[MAXPATHLEN + 64];
    267 };
    268 
    269 static char blktrace_version[] = "2.0.0";
    270 
    271 /*
    272  * Linkage to blktrace helper routines (trace conversions)
    273  */
    274 int data_is_native = -1;
    275 
    276 static int ndevs;
    277 static int max_cpus;
    278 static int ncpus;
    279 static cpu_set_t *online_cpus;
    280 static int pagesize;
    281 static int act_mask = ~0U;
    282 static int kill_running_trace;
    283 static int stop_watch;
    284 static int piped_output;
    285 
    286 static char *debugfs_path = "/sys/kernel/debug";
    287 static char *output_name;
    288 static char *output_dir;
    289 
    290 static unsigned long buf_size = BUF_SIZE;
    291 static unsigned long buf_nr = BUF_NR;
    292 
    293 static FILE *pfp;
    294 
    295 static LIST_HEAD(devpaths);
    296 static LIST_HEAD(tracers);
    297 
    298 static volatile int done;
    299 
    300 /*
    301  * tracer threads add entries, the main thread takes them off and processes
    302  * them. These protect the dp_entries variable.
    303  */
    304 static pthread_cond_t dp_cond = PTHREAD_COND_INITIALIZER;
    305 static pthread_mutex_t dp_mutex = PTHREAD_MUTEX_INITIALIZER;
    306 static volatile int dp_entries;
    307 
    308 /*
    309  * These synchronize master / thread interactions.
    310  */
    311 static pthread_cond_t mt_cond = PTHREAD_COND_INITIALIZER;
    312 static pthread_mutex_t mt_mutex = PTHREAD_MUTEX_INITIALIZER;
    313 static volatile int nthreads_running;
    314 static volatile int nthreads_leaving;
    315 static volatile int nthreads_error;
    316 static volatile int tracers_run;
    317 
    318 /*
    319  * network cmd line params
    320  */
    321 static struct sockaddr_in hostname_addr;
    322 static char hostname[MAXHOSTNAMELEN];
    323 static int net_port = TRACE_NET_PORT;
    324 static int net_use_sendfile = 1;
    325 static int net_mode;
    326 static int *cl_fds;
    327 
    328 static int (*handle_pfds)(struct tracer *, int, int);
    329 static int (*handle_list)(struct tracer_devpath_head *, struct list_head *);
    330 
    331 #define S_OPTS	"d:a:A:r:o:kw:vVb:n:D:lh:p:sI:"
    332 static struct option l_opts[] = {
    333 	{
    334 		.name = "dev",
    335 		.has_arg = required_argument,
    336 		.flag = NULL,
    337 		.val = 'd'
    338 	},
    339 	{
    340 		.name = "input-devs",
    341 		.has_arg = required_argument,
    342 		.flag = NULL,
    343 		.val = 'I'
    344 	},
    345 	{
    346 		.name = "act-mask",
    347 		.has_arg = required_argument,
    348 		.flag = NULL,
    349 		.val = 'a'
    350 	},
    351 	{
    352 		.name = "set-mask",
    353 		.has_arg = required_argument,
    354 		.flag = NULL,
    355 		.val = 'A'
    356 	},
    357 	{
    358 		.name = "relay",
    359 		.has_arg = required_argument,
    360 		.flag = NULL,
    361 		.val = 'r'
    362 	},
    363 	{
    364 		.name = "output",
    365 		.has_arg = required_argument,
    366 		.flag = NULL,
    367 		.val = 'o'
    368 	},
    369 	{
    370 		.name = "kill",
    371 		.has_arg = no_argument,
    372 		.flag = NULL,
    373 		.val = 'k'
    374 	},
    375 	{
    376 		.name = "stopwatch",
    377 		.has_arg = required_argument,
    378 		.flag = NULL,
    379 		.val = 'w'
    380 	},
    381 	{
    382 		.name = "version",
    383 		.has_arg = no_argument,
    384 		.flag = NULL,
    385 		.val = 'v'
    386 	},
    387 	{
    388 		.name = "version",
    389 		.has_arg = no_argument,
    390 		.flag = NULL,
    391 		.val = 'V'
    392 	},
    393 	{
    394 		.name = "buffer-size",
    395 		.has_arg = required_argument,
    396 		.flag = NULL,
    397 		.val = 'b'
    398 	},
    399 	{
    400 		.name = "num-sub-buffers",
    401 		.has_arg = required_argument,
    402 		.flag = NULL,
    403 		.val = 'n'
    404 	},
    405 	{
    406 		.name = "output-dir",
    407 		.has_arg = required_argument,
    408 		.flag = NULL,
    409 		.val = 'D'
    410 	},
    411 	{
    412 		.name = "listen",
    413 		.has_arg = no_argument,
    414 		.flag = NULL,
    415 		.val = 'l'
    416 	},
    417 	{
    418 		.name = "host",
    419 		.has_arg = required_argument,
    420 		.flag = NULL,
    421 		.val = 'h'
    422 	},
    423 	{
    424 		.name = "port",
    425 		.has_arg = required_argument,
    426 		.flag = NULL,
    427 		.val = 'p'
    428 	},
    429 	{
    430 		.name = "no-sendfile",
    431 		.has_arg = no_argument,
    432 		.flag = NULL,
    433 		.val = 's'
    434 	},
    435 	{
    436 		.name = NULL,
    437 	}
    438 };
    439 
    440 static char usage_str[] = "\n\n" \
    441 	"-d <dev>             | --dev=<dev>\n" \
    442         "[ -r <debugfs path>  | --relay=<debugfs path> ]\n" \
    443         "[ -o <file>          | --output=<file>]\n" \
    444         "[ -D <dir>           | --output-dir=<dir>\n" \
    445         "[ -w <time>          | --stopwatch=<time>]\n" \
    446         "[ -a <action field>  | --act-mask=<action field>]\n" \
    447         "[ -A <action mask>   | --set-mask=<action mask>]\n" \
    448         "[ -b <size>          | --buffer-size]\n" \
    449         "[ -n <number>        | --num-sub-buffers=<number>]\n" \
    450         "[ -l                 | --listen]\n" \
    451         "[ -h <hostname>      | --host=<hostname>]\n" \
    452         "[ -p <port number>   | --port=<port number>]\n" \
    453         "[ -s                 | --no-sendfile]\n" \
    454         "[ -I <devs file>     | --input-devs=<devs file>]\n" \
    455         "[ -v <version>       | --version]\n" \
    456         "[ -V <version>       | --version]\n" \
    457 
    458 	"\t-d Use specified device. May also be given last after options\n" \
    459 	"\t-r Path to mounted debugfs, defaults to /sys/kernel/debug\n" \
    460 	"\t-o File(s) to send output to\n" \
    461 	"\t-D Directory to prepend to output file names\n" \
    462 	"\t-w Stop after defined time, in seconds\n" \
    463 	"\t-a Only trace specified actions. See documentation\n" \
    464 	"\t-A Give trace mask as a single value. See documentation\n" \
    465 	"\t-b Sub buffer size in KiB (default 512)\n" \
    466 	"\t-n Number of sub buffers (default 4)\n" \
    467 	"\t-l Run in network listen mode (blktrace server)\n" \
    468 	"\t-h Run in network client mode, connecting to the given host\n" \
    469 	"\t-p Network port to use (default 8462)\n" \
    470 	"\t-s Make the network client NOT use sendfile() to transfer data\n" \
    471 	"\t-I Add devices found in <devs file>\n" \
    472 	"\t-v Print program version info\n" \
    473 	"\t-V Print program version info\n\n";
    474 
    475 static void clear_events(struct pollfd *pfd)
    476 {
    477 	pfd->events = 0;
    478 	pfd->revents = 0;
    479 }
    480 
    481 static inline int net_client_use_sendfile(void)
    482 {
    483 	return net_mode == Net_client && net_use_sendfile;
    484 }
    485 
    486 static inline int net_client_use_send(void)
    487 {
    488 	return net_mode == Net_client && !net_use_sendfile;
    489 }
    490 
    491 static inline int use_tracer_devpaths(void)
    492 {
    493 	return piped_output || net_client_use_send();
    494 }
    495 
    496 static inline int in_addr_eq(struct in_addr a, struct in_addr b)
    497 {
    498 	return a.s_addr == b.s_addr;
    499 }
    500 
    501 static inline void pdc_dr_update(struct devpath *dpp, int cpu, int data_read)
    502 {
    503 	dpp->stats[cpu].data_read += data_read;
    504 }
    505 
    506 static inline void pdc_nev_update(struct devpath *dpp, int cpu, int nevents)
    507 {
    508 	dpp->stats[cpu].nevents += nevents;
    509 }
    510 
    511 static void show_usage(char *prog)
    512 {
    513 	fprintf(stderr, "Usage: %s %s", prog, usage_str);
    514 }
    515 
    516 /*
    517  * Create a timespec 'msec' milliseconds into the future
    518  */
    519 static inline void make_timespec(struct timespec *tsp, long delta_msec)
    520 {
    521 	struct timeval now;
    522 
    523 	gettimeofday(&now, NULL);
    524 	tsp->tv_sec = now.tv_sec;
    525 	tsp->tv_nsec = 1000L * now.tv_usec;
    526 
    527 	tsp->tv_nsec += (delta_msec * 1000000L);
    528 	if (tsp->tv_nsec > 1000000000L) {
    529 		long secs = tsp->tv_nsec / 1000000000L;
    530 
    531 		tsp->tv_sec += secs;
    532 		tsp->tv_nsec -= (secs * 1000000000L);
    533 	}
    534 }
    535 
    536 /*
    537  * Add a timer to ensure wait ends
    538  */
    539 static void t_pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
    540 {
    541 	struct timespec ts;
    542 
    543 	make_timespec(&ts, 50);
    544 	pthread_cond_timedwait(cond, mutex, &ts);
    545 }
    546 
    547 static void unblock_tracers(void)
    548 {
    549 	pthread_mutex_lock(&mt_mutex);
    550 	tracers_run = 1;
    551 	pthread_cond_broadcast(&mt_cond);
    552 	pthread_mutex_unlock(&mt_mutex);
    553 }
    554 
    555 static void tracer_wait_unblock(struct tracer *tp)
    556 {
    557 	pthread_mutex_lock(&mt_mutex);
    558 	while (!tp->is_done && !tracers_run)
    559 		pthread_cond_wait(&mt_cond, &mt_mutex);
    560 	pthread_mutex_unlock(&mt_mutex);
    561 }
    562 
    563 static void tracer_signal_ready(struct tracer *tp,
    564 				enum thread_status th_status,
    565 				int status)
    566 {
    567 	pthread_mutex_lock(&mt_mutex);
    568 	tp->status = status;
    569 
    570 	if (th_status == Th_running)
    571 		nthreads_running++;
    572 	else if (th_status == Th_error)
    573 		nthreads_error++;
    574 	else
    575 		nthreads_leaving++;
    576 
    577 	pthread_cond_signal(&mt_cond);
    578 	pthread_mutex_unlock(&mt_mutex);
    579 }
    580 
    581 static void wait_tracers_ready(int ncpus_started)
    582 {
    583 	pthread_mutex_lock(&mt_mutex);
    584 	while ((nthreads_running + nthreads_error) < ncpus_started)
    585 		t_pthread_cond_wait(&mt_cond, &mt_mutex);
    586 	pthread_mutex_unlock(&mt_mutex);
    587 }
    588 
    589 static void wait_tracers_leaving(void)
    590 {
    591 	pthread_mutex_lock(&mt_mutex);
    592 	while (nthreads_leaving < nthreads_running)
    593 		t_pthread_cond_wait(&mt_cond, &mt_mutex);
    594 	pthread_mutex_unlock(&mt_mutex);
    595 }
    596 
    597 static void init_mmap_info(struct mmap_info *mip)
    598 {
    599 	mip->buf_size = buf_size;
    600 	mip->buf_nr = buf_nr;
    601 	mip->pagesize = pagesize;
    602 }
    603 
    604 static void net_close_connection(int *fd)
    605 {
    606 	shutdown(*fd, SHUT_RDWR);
    607 	close(*fd);
    608 	*fd = -1;
    609 }
    610 
    611 static void dpp_free(struct devpath *dpp)
    612 {
    613 	if (dpp->stats)
    614 		free(dpp->stats);
    615 	if (dpp->ios)
    616 		free(dpp->ios);
    617 	if (dpp->path)
    618 		free(dpp->path);
    619 	if (dpp->buts_name)
    620 		free(dpp->buts_name);
    621 	free(dpp);
    622 }
    623 
    624 static int lock_on_cpu(int cpu)
    625 {
    626 	cpu_set_t * cpu_mask;
    627 	size_t size;
    628 
    629 	cpu_mask = CPU_ALLOC(max_cpus);
    630 	size = CPU_ALLOC_SIZE(max_cpus);
    631 
    632 	CPU_ZERO_S(size, cpu_mask);
    633 	CPU_SET_S(cpu, size, cpu_mask);
    634 	if (sched_setaffinity(0, size, cpu_mask) < 0) {
    635 		CPU_FREE(cpu_mask);
    636 		return errno;
    637 	}
    638 
    639 	CPU_FREE(cpu_mask);
    640 	return 0;
    641 }
    642 
    643 static int increase_limit(int resource, rlim_t increase)
    644 {
    645 	struct rlimit rlim;
    646 	int save_errno = errno;
    647 
    648 	if (!getrlimit(resource, &rlim)) {
    649 		rlim.rlim_cur += increase;
    650 		if (rlim.rlim_cur >= rlim.rlim_max)
    651 			rlim.rlim_max = rlim.rlim_cur + increase;
    652 
    653 		if (!setrlimit(resource, &rlim))
    654 			return 1;
    655 	}
    656 
    657 	errno = save_errno;
    658 	return 0;
    659 }
    660 
    661 static int handle_open_failure(void)
    662 {
    663 	if (errno == ENFILE || errno == EMFILE)
    664 		return increase_limit(RLIMIT_NOFILE, 16);
    665 	return 0;
    666 }
    667 
    668 static int handle_mem_failure(size_t length)
    669 {
    670 	if (errno == ENFILE)
    671 		return handle_open_failure();
    672 	else if (errno == ENOMEM)
    673 		return increase_limit(RLIMIT_MEMLOCK, 2 * length);
    674 	return 0;
    675 }
    676 
    677 static FILE *my_fopen(const char *path, const char *mode)
    678 {
    679 	FILE *fp;
    680 
    681 	do {
    682 		fp = fopen(path, mode);
    683 	} while (fp == NULL && handle_open_failure());
    684 
    685 	return fp;
    686 }
    687 
    688 static int my_open(const char *path, int flags)
    689 {
    690 	int fd;
    691 
    692 	do {
    693 		fd = open(path, flags);
    694 	} while (fd < 0 && handle_open_failure());
    695 
    696 	return fd;
    697 }
    698 
    699 static int my_socket(int domain, int type, int protocol)
    700 {
    701 	int fd;
    702 
    703 	do {
    704 		fd = socket(domain, type, protocol);
    705 	} while (fd < 0 && handle_open_failure());
    706 
    707 	return fd;
    708 }
    709 
    710 static int my_accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen)
    711 {
    712 	int fd;
    713 
    714 	do {
    715 		fd = accept(sockfd, addr, addrlen);
    716 	} while (fd < 0 && handle_open_failure());
    717 
    718 	return fd;
    719 }
    720 
    721 static void *my_mmap(void *addr, size_t length, int prot, int flags, int fd,
    722 		     off_t offset)
    723 {
    724 	void *new;
    725 
    726 	do {
    727 		new = mmap(addr, length, prot, flags, fd, offset);
    728 	} while (new == MAP_FAILED && handle_mem_failure(length));
    729 
    730 	return new;
    731 }
    732 
    733 static int my_mlock(struct tracer *tp,
    734 		    const void *addr, size_t len)
    735 {
    736 	int ret, retry = 0;
    737 
    738 	do {
    739 		ret = mlock(addr, len);
    740 		if ((retry >= 10) && tp && tp->is_done)
    741 			break;
    742 		retry++;
    743 	} while (ret < 0 && handle_mem_failure(len));
    744 
    745 	return ret;
    746 }
    747 
    748 static int setup_mmap(int fd, unsigned int maxlen,
    749 		      struct mmap_info *mip,
    750 		      struct tracer *tp)
    751 {
    752 	if (mip->fs_off + maxlen > mip->fs_buf_len) {
    753 		unsigned long nr = max(16, mip->buf_nr);
    754 
    755 		if (mip->fs_buf) {
    756 			munlock(mip->fs_buf, mip->fs_buf_len);
    757 			munmap(mip->fs_buf, mip->fs_buf_len);
    758 			mip->fs_buf = NULL;
    759 		}
    760 
    761 		mip->fs_off = mip->fs_size & (mip->pagesize - 1);
    762 		mip->fs_buf_len = (nr * mip->buf_size) - mip->fs_off;
    763 		mip->fs_max_size += mip->fs_buf_len;
    764 
    765 		if (ftruncate(fd, mip->fs_max_size) < 0) {
    766 			perror("setup_mmap: ftruncate");
    767 			return 1;
    768 		}
    769 
    770 		mip->fs_buf = my_mmap(NULL, mip->fs_buf_len, PROT_WRITE,
    771 				      MAP_SHARED, fd,
    772 				      mip->fs_size - mip->fs_off);
    773 		if (mip->fs_buf == MAP_FAILED) {
    774 			perror("setup_mmap: mmap");
    775 			return 1;
    776 		}
    777 		if (my_mlock(tp, mip->fs_buf, mip->fs_buf_len) < 0) {
    778 			perror("setup_mlock: mlock");
    779 			return 1;
    780 		}
    781 	}
    782 
    783 	return 0;
    784 }
    785 
    786 static int __stop_trace(int fd)
    787 {
    788 	/*
    789 	 * Should be stopped, don't complain if it isn't
    790 	 */
    791 	ioctl(fd, BLKTRACESTOP);
    792 	return ioctl(fd, BLKTRACETEARDOWN);
    793 }
    794 
    795 static int write_data(char *buf, int len)
    796 {
    797 	int ret;
    798 
    799 rewrite:
    800 	ret = fwrite(buf, len, 1, pfp);
    801 	if (ferror(pfp) || ret != 1) {
    802 		if (errno == EINTR) {
    803 			clearerr(pfp);
    804 			goto rewrite;
    805 		}
    806 
    807 		if (!piped_output || (errno != EPIPE && errno != EBADF)) {
    808 			fprintf(stderr, "write(%d) failed: %d/%s\n",
    809 				len, errno, strerror(errno));
    810 		}
    811 		goto err;
    812 	}
    813 
    814 	fflush(pfp);
    815 	return 0;
    816 
    817 err:
    818 	clearerr(pfp);
    819 	return 1;
    820 }
    821 
    822 /*
    823  * Returns the number of bytes read (successfully)
    824  */
    825 static int __net_recv_data(int fd, void *buf, unsigned int len)
    826 {
    827 	unsigned int bytes_left = len;
    828 
    829 	while (bytes_left && !done) {
    830 		int ret = recv(fd, buf, bytes_left, MSG_WAITALL);
    831 
    832 		if (ret == 0)
    833 			break;
    834 		else if (ret < 0) {
    835 			if (errno == EAGAIN) {
    836 				usleep(50);
    837 				continue;
    838 			}
    839 			perror("server: net_recv_data: recv failed");
    840 			break;
    841 		} else {
    842 			buf += ret;
    843 			bytes_left -= ret;
    844 		}
    845 	}
    846 
    847 	return len - bytes_left;
    848 }
    849 
    850 static int net_recv_data(int fd, void *buf, unsigned int len)
    851 {
    852 	return __net_recv_data(fd, buf, len);
    853 }
    854 
    855 /*
    856  * Returns number of bytes written
    857  */
    858 static int net_send_data(int fd, void *buf, unsigned int buf_len)
    859 {
    860 	int ret;
    861 	unsigned int bytes_left = buf_len;
    862 
    863 	while (bytes_left) {
    864 		ret = send(fd, buf, bytes_left, 0);
    865 		if (ret < 0) {
    866 			perror("send");
    867 			break;
    868 		}
    869 
    870 		buf += ret;
    871 		bytes_left -= ret;
    872 	}
    873 
    874 	return buf_len - bytes_left;
    875 }
    876 
    877 static int net_send_header(int fd, int cpu, char *buts_name, int len)
    878 {
    879 	struct blktrace_net_hdr hdr;
    880 
    881 	memset(&hdr, 0, sizeof(hdr));
    882 
    883 	hdr.magic = BLK_IO_TRACE_MAGIC;
    884 	memset(hdr.buts_name, 0, sizeof(hdr.buts_name));
    885 	strncpy(hdr.buts_name, buts_name, sizeof(hdr.buts_name));
    886 	hdr.buts_name[sizeof(hdr.buts_name) - 1] = '\0';
    887 	hdr.cpu = cpu;
    888 	hdr.max_cpus = max_cpus;
    889 	hdr.len = len;
    890 	hdr.cl_id = getpid();
    891 	hdr.buf_size = buf_size;
    892 	hdr.buf_nr = buf_nr;
    893 	hdr.page_size = pagesize;
    894 
    895 	return net_send_data(fd, &hdr, sizeof(hdr)) != sizeof(hdr);
    896 }
    897 
    898 static void net_send_open_close(int fd, int cpu, char *buts_name, int len)
    899 {
    900 	struct blktrace_net_hdr ret_hdr;
    901 
    902 	net_send_header(fd, cpu, buts_name, len);
    903 	net_recv_data(fd, &ret_hdr, sizeof(ret_hdr));
    904 }
    905 
    906 static void net_send_open(int fd, int cpu, char *buts_name)
    907 {
    908 	net_send_open_close(fd, cpu, buts_name, 0);
    909 }
    910 
    911 static void net_send_close(int fd, char *buts_name, int drops)
    912 {
    913 	/*
    914 	 * Overload CPU w/ number of drops
    915 	 *
    916 	 * XXX: Need to clear/set done around call - done=1 (which
    917 	 * is true here) stops reads from happening... :-(
    918 	 */
    919 	done = 0;
    920 	net_send_open_close(fd, drops, buts_name, 1);
    921 	done = 1;
    922 }
    923 
    924 static void ack_open_close(int fd, char *buts_name)
    925 {
    926 	net_send_header(fd, 0, buts_name, 2);
    927 }
    928 
    929 static void net_send_drops(int fd)
    930 {
    931 	struct list_head *p;
    932 
    933 	__list_for_each(p, &devpaths) {
    934 		struct devpath *dpp = list_entry(p, struct devpath, head);
    935 
    936 		net_send_close(fd, dpp->buts_name, dpp->drops);
    937 	}
    938 }
    939 
    940 /*
    941  * Returns:
    942  *	 0: "EOF"
    943  *	 1: OK
    944  *	-1: Error
    945  */
    946 static int net_get_header(struct cl_conn *nc, struct blktrace_net_hdr *bnh)
    947 {
    948 	int bytes_read;
    949 	int fl = fcntl(nc->fd, F_GETFL);
    950 
    951 	fcntl(nc->fd, F_SETFL, fl | O_NONBLOCK);
    952 	bytes_read = __net_recv_data(nc->fd, bnh, sizeof(*bnh));
    953 	fcntl(nc->fd, F_SETFL, fl & ~O_NONBLOCK);
    954 
    955 	if (bytes_read == sizeof(*bnh))
    956 		return 1;
    957 	else if (bytes_read == 0)
    958 		return 0;
    959 	else
    960 		return -1;
    961 }
    962 
    963 static int net_setup_addr(void)
    964 {
    965 	struct sockaddr_in *addr = &hostname_addr;
    966 
    967 	memset(addr, 0, sizeof(*addr));
    968 	addr->sin_family = AF_INET;
    969 	addr->sin_port = htons(net_port);
    970 
    971 	if (inet_aton(hostname, &addr->sin_addr) != 1) {
    972 		struct hostent *hent;
    973 retry:
    974 		hent = gethostbyname(hostname);
    975 		if (!hent) {
    976 			if (h_errno == TRY_AGAIN) {
    977 				usleep(100);
    978 				goto retry;
    979 			} else if (h_errno == NO_RECOVERY) {
    980 				fprintf(stderr, "gethostbyname(%s)"
    981 					"non-recoverable error encountered\n",
    982 					hostname);
    983 			} else {
    984 				/*
    985 				 * HOST_NOT_FOUND, NO_ADDRESS or NO_DATA
    986 				 */
    987 				fprintf(stderr, "Host %s not found\n",
    988 					hostname);
    989 			}
    990 			return 1;
    991 		}
    992 
    993 		memcpy(&addr->sin_addr, hent->h_addr, 4);
    994 		memset(hostname, 0, sizeof(hostname));
    995 		strncpy(hostname, hent->h_name, sizeof(hostname));
    996 		hostname[sizeof(hostname) - 1] = '\0';
    997 	}
    998 
    999 	return 0;
   1000 }
   1001 
   1002 static int net_setup_client(void)
   1003 {
   1004 	int fd;
   1005 	struct sockaddr_in *addr = &hostname_addr;
   1006 
   1007 	fd = my_socket(AF_INET, SOCK_STREAM, 0);
   1008 	if (fd < 0) {
   1009 		perror("client: socket");
   1010 		return -1;
   1011 	}
   1012 
   1013 	if (connect(fd, (struct sockaddr *)addr, sizeof(*addr)) < 0) {
   1014 		if (errno == ECONNREFUSED)
   1015 			fprintf(stderr,
   1016 				"\nclient: Connection to %s refused, "
   1017 				"perhaps the server is not started?\n\n",
   1018 				hostname);
   1019 		else
   1020 			perror("client: connect");
   1021 
   1022 		close(fd);
   1023 		return -1;
   1024 	}
   1025 
   1026 	return fd;
   1027 }
   1028 
   1029 static int open_client_connections(void)
   1030 {
   1031 	int cpu;
   1032 	size_t alloc_size = CPU_ALLOC_SIZE(max_cpus);
   1033 
   1034 	cl_fds = calloc(ncpus, sizeof(*cl_fds));
   1035 	for (cpu = 0; cpu < max_cpus; cpu++) {
   1036 		if (!CPU_ISSET_S(cpu, alloc_size, online_cpus))
   1037 			continue;
   1038 		cl_fds[cpu] = net_setup_client();
   1039 		if (cl_fds[cpu] < 0)
   1040 			goto err;
   1041 	}
   1042 	return 0;
   1043 
   1044 err:
   1045 	while (cpu > 0)
   1046 		close(cl_fds[cpu--]);
   1047 	free(cl_fds);
   1048 	return 1;
   1049 }
   1050 
   1051 static void close_client_connections(void)
   1052 {
   1053 	if (cl_fds) {
   1054 		int cpu, *fdp;
   1055 		size_t alloc_size = CPU_ALLOC_SIZE(max_cpus);
   1056 
   1057 		for (cpu = 0, fdp = cl_fds; cpu < max_cpus; cpu++, fdp++) {
   1058 			if (!CPU_ISSET_S(cpu, alloc_size, online_cpus))
   1059 				continue;
   1060 			if (*fdp >= 0) {
   1061 				net_send_drops(*fdp);
   1062 				net_close_connection(fdp);
   1063 			}
   1064 		}
   1065 		free(cl_fds);
   1066 	}
   1067 }
   1068 
   1069 static int setup_buts(void)
   1070 {
   1071 	struct list_head *p;
   1072 	int ret = 0;
   1073 
   1074 	__list_for_each(p, &devpaths) {
   1075 		struct blk_user_trace_setup buts;
   1076 		struct devpath *dpp = list_entry(p, struct devpath, head);
   1077 
   1078 		memset(&buts, 0, sizeof(buts));
   1079 		buts.buf_size = buf_size;
   1080 		buts.buf_nr = buf_nr;
   1081 		buts.act_mask = act_mask;
   1082 
   1083 		if (ioctl(dpp->fd, BLKTRACESETUP, &buts) >= 0) {
   1084 			dpp->ncpus = max_cpus;
   1085 			dpp->buts_name = strdup(buts.name);
   1086 			if (dpp->stats)
   1087 				free(dpp->stats);
   1088 			dpp->stats = calloc(dpp->ncpus, sizeof(*dpp->stats));
   1089 			memset(dpp->stats, 0, dpp->ncpus * sizeof(*dpp->stats));
   1090 		} else {
   1091 			fprintf(stderr, "BLKTRACESETUP(2) %s failed: %d/%s\n",
   1092 				dpp->path, errno, strerror(errno));
   1093 			ret++;
   1094 		}
   1095 	}
   1096 
   1097 	return ret;
   1098 }
   1099 
   1100 static void start_buts(void)
   1101 {
   1102 	struct list_head *p;
   1103 
   1104 	__list_for_each(p, &devpaths) {
   1105 		struct devpath *dpp = list_entry(p, struct devpath, head);
   1106 
   1107 		if (ioctl(dpp->fd, BLKTRACESTART) < 0) {
   1108 			fprintf(stderr, "BLKTRACESTART %s failed: %d/%s\n",
   1109 				dpp->path, errno, strerror(errno));
   1110 		}
   1111 	}
   1112 }
   1113 
   1114 static int get_drops(struct devpath *dpp)
   1115 {
   1116 	int fd, drops = 0;
   1117 	char fn[MAXPATHLEN + 64], tmp[256];
   1118 
   1119 	snprintf(fn, sizeof(fn), "%s/block/%s/dropped", debugfs_path,
   1120 		 dpp->buts_name);
   1121 
   1122 	fd = my_open(fn, O_RDONLY);
   1123 	if (fd < 0) {
   1124 		/*
   1125 		 * This may be ok: the kernel may not support
   1126 		 * dropped counts.
   1127 		 */
   1128 		if (errno != ENOENT)
   1129 			fprintf(stderr, "Could not open %s: %d/%s\n",
   1130 				fn, errno, strerror(errno));
   1131 		return 0;
   1132 	} else if (read(fd, tmp, sizeof(tmp)) < 0) {
   1133 		fprintf(stderr, "Could not read %s: %d/%s\n",
   1134 			fn, errno, strerror(errno));
   1135 	} else
   1136 		drops = atoi(tmp);
   1137 	close(fd);
   1138 
   1139 	return drops;
   1140 }
   1141 
   1142 static void get_all_drops(void)
   1143 {
   1144 	struct list_head *p;
   1145 
   1146 	__list_for_each(p, &devpaths) {
   1147 		struct devpath *dpp = list_entry(p, struct devpath, head);
   1148 
   1149 		dpp->drops = get_drops(dpp);
   1150 	}
   1151 }
   1152 
   1153 static inline struct trace_buf *alloc_trace_buf(int cpu, int bufsize)
   1154 {
   1155 	struct trace_buf *tbp;
   1156 
   1157 	tbp = malloc(sizeof(*tbp) + bufsize);
   1158 	INIT_LIST_HEAD(&tbp->head);
   1159 	tbp->len = 0;
   1160 	tbp->buf = (void *)(tbp + 1);
   1161 	tbp->cpu = cpu;
   1162 	tbp->dpp = NULL;	/* Will be set when tbp is added */
   1163 
   1164 	return tbp;
   1165 }
   1166 
   1167 static void free_tracer_heads(struct devpath *dpp)
   1168 {
   1169 	int cpu;
   1170 	struct tracer_devpath_head *hd;
   1171 
   1172 	for (cpu = 0, hd = dpp->heads; cpu < max_cpus; cpu++, hd++) {
   1173 		if (hd->prev)
   1174 			free(hd->prev);
   1175 
   1176 		pthread_mutex_destroy(&hd->mutex);
   1177 	}
   1178 	free(dpp->heads);
   1179 }
   1180 
   1181 static int setup_tracer_devpaths(void)
   1182 {
   1183 	struct list_head *p;
   1184 
   1185 	if (net_client_use_send())
   1186 		if (open_client_connections())
   1187 			return 1;
   1188 
   1189 	__list_for_each(p, &devpaths) {
   1190 		int cpu;
   1191 		struct tracer_devpath_head *hd;
   1192 		struct devpath *dpp = list_entry(p, struct devpath, head);
   1193 
   1194 		dpp->heads = calloc(max_cpus, sizeof(struct tracer_devpath_head));
   1195 		for (cpu = 0, hd = dpp->heads; cpu < max_cpus; cpu++, hd++) {
   1196 			INIT_LIST_HEAD(&hd->head);
   1197 			pthread_mutex_init(&hd->mutex, NULL);
   1198 			hd->prev = NULL;
   1199 		}
   1200 	}
   1201 
   1202 	return 0;
   1203 }
   1204 
   1205 static inline void add_trace_buf(struct devpath *dpp, int cpu,
   1206 						struct trace_buf **tbpp)
   1207 {
   1208 	struct trace_buf *tbp = *tbpp;
   1209 	struct tracer_devpath_head *hd = &dpp->heads[cpu];
   1210 
   1211 	tbp->dpp = dpp;
   1212 
   1213 	pthread_mutex_lock(&hd->mutex);
   1214 	list_add_tail(&tbp->head, &hd->head);
   1215 	pthread_mutex_unlock(&hd->mutex);
   1216 
   1217 	*tbpp = alloc_trace_buf(cpu, buf_size);
   1218 }
   1219 
   1220 static inline void incr_entries(int entries_handled)
   1221 {
   1222 	pthread_mutex_lock(&dp_mutex);
   1223 	if (dp_entries == 0)
   1224 		pthread_cond_signal(&dp_cond);
   1225 	dp_entries += entries_handled;
   1226 	pthread_mutex_unlock(&dp_mutex);
   1227 }
   1228 
   1229 static void decr_entries(int handled)
   1230 {
   1231 	pthread_mutex_lock(&dp_mutex);
   1232 	dp_entries -= handled;
   1233 	pthread_mutex_unlock(&dp_mutex);
   1234 }
   1235 
   1236 static int wait_empty_entries(void)
   1237 {
   1238 	pthread_mutex_lock(&dp_mutex);
   1239 	while (!done && dp_entries == 0)
   1240 		t_pthread_cond_wait(&dp_cond, &dp_mutex);
   1241 	pthread_mutex_unlock(&dp_mutex);
   1242 
   1243 	return !done;
   1244 }
   1245 
   1246 static int add_devpath(char *path)
   1247 {
   1248 	int fd;
   1249 	struct devpath *dpp;
   1250 	struct list_head *p;
   1251 
   1252 	/*
   1253 	 * Verify device is not duplicated
   1254 	 */
   1255 	__list_for_each(p, &devpaths) {
   1256 	       struct devpath *tmp = list_entry(p, struct devpath, head);
   1257 	       if (!strcmp(tmp->path, path))
   1258 		        return 0;
   1259 	}
   1260 	/*
   1261 	 * Verify device is valid before going too far
   1262 	 */
   1263 	fd = my_open(path, O_RDONLY | O_NONBLOCK);
   1264 	if (fd < 0) {
   1265 		fprintf(stderr, "Invalid path %s specified: %d/%s\n",
   1266 			path, errno, strerror(errno));
   1267 		return 1;
   1268 	}
   1269 
   1270 	dpp = malloc(sizeof(*dpp));
   1271 	memset(dpp, 0, sizeof(*dpp));
   1272 	dpp->path = strdup(path);
   1273 	dpp->fd = fd;
   1274 	ndevs++;
   1275 	list_add_tail(&dpp->head, &devpaths);
   1276 
   1277 	return 0;
   1278 }
   1279 
   1280 static void rel_devpaths(void)
   1281 {
   1282 	struct list_head *p, *q;
   1283 
   1284 	list_for_each_safe(p, q, &devpaths) {
   1285 		struct devpath *dpp = list_entry(p, struct devpath, head);
   1286 
   1287 		list_del(&dpp->head);
   1288 		__stop_trace(dpp->fd);
   1289 		close(dpp->fd);
   1290 
   1291 		if (dpp->heads)
   1292 			free_tracer_heads(dpp);
   1293 
   1294 		dpp_free(dpp);
   1295 		ndevs--;
   1296 	}
   1297 }
   1298 
   1299 static int flush_subbuf_net(struct trace_buf *tbp)
   1300 {
   1301 	int fd = cl_fds[tbp->cpu];
   1302 	struct devpath *dpp = tbp->dpp;
   1303 
   1304 	if (net_send_header(fd, tbp->cpu, dpp->buts_name, tbp->len))
   1305 		return 1;
   1306 	else if (net_send_data(fd, tbp->buf, tbp->len) != tbp->len)
   1307 		return 1;
   1308 
   1309 	return 0;
   1310 }
   1311 
   1312 static int
   1313 handle_list_net(__attribute__((__unused__))struct tracer_devpath_head *hd,
   1314 		struct list_head *list)
   1315 {
   1316 	struct trace_buf *tbp;
   1317 	struct list_head *p, *q;
   1318 	int entries_handled = 0;
   1319 
   1320 	list_for_each_safe(p, q, list) {
   1321 		tbp = list_entry(p, struct trace_buf, head);
   1322 
   1323 		list_del(&tbp->head);
   1324 		entries_handled++;
   1325 
   1326 		if (cl_fds[tbp->cpu] >= 0) {
   1327 			if (flush_subbuf_net(tbp)) {
   1328 				close(cl_fds[tbp->cpu]);
   1329 				cl_fds[tbp->cpu] = -1;
   1330 			}
   1331 		}
   1332 
   1333 		free(tbp);
   1334 	}
   1335 
   1336 	return entries_handled;
   1337 }
   1338 
   1339 /*
   1340  * Tack 'tbp's buf onto the tail of 'prev's buf
   1341  */
   1342 static struct trace_buf *tb_combine(struct trace_buf *prev,
   1343 				    struct trace_buf *tbp)
   1344 {
   1345 	unsigned long tot_len;
   1346 
   1347 	tot_len = prev->len + tbp->len;
   1348 	if (tot_len > buf_size) {
   1349 		/*
   1350 		 * tbp->head isn't connected (it was 'prev'
   1351 		 * so it had been taken off of the list
   1352 		 * before). Therefore, we can realloc
   1353 		 * the whole structures, as the other fields
   1354 		 * are "static".
   1355 		 */
   1356 		prev = realloc(prev, sizeof(*prev) + tot_len);
   1357 		prev->buf = (void *)(prev + 1);
   1358 	}
   1359 
   1360 	memcpy(prev->buf + prev->len, tbp->buf, tbp->len);
   1361 	prev->len = tot_len;
   1362 
   1363 	free(tbp);
   1364 	return prev;
   1365 }
   1366 
   1367 static int handle_list_file(struct tracer_devpath_head *hd,
   1368 			    struct list_head *list)
   1369 {
   1370 	int off, t_len, nevents;
   1371 	struct blk_io_trace *t;
   1372 	struct list_head *p, *q;
   1373 	int entries_handled = 0;
   1374 	struct trace_buf *tbp, *prev;
   1375 
   1376 	prev = hd->prev;
   1377 	list_for_each_safe(p, q, list) {
   1378 		tbp = list_entry(p, struct trace_buf, head);
   1379 		list_del(&tbp->head);
   1380 		entries_handled++;
   1381 
   1382 		/*
   1383 		 * If there was some leftover before, tack this new
   1384 		 * entry onto the tail of the previous one.
   1385 		 */
   1386 		if (prev)
   1387 			tbp = tb_combine(prev, tbp);
   1388 
   1389 		/*
   1390 		 * See how many whole traces there are - send them
   1391 		 * all out in one go.
   1392 		 */
   1393 		off = 0;
   1394 		nevents = 0;
   1395 		while (off + (int)sizeof(*t) <= tbp->len) {
   1396 			t = (struct blk_io_trace *)(tbp->buf + off);
   1397 			t_len = sizeof(*t) + t->pdu_len;
   1398 			if (off + t_len > tbp->len)
   1399 				break;
   1400 
   1401 			off += t_len;
   1402 			nevents++;
   1403 		}
   1404 		if (nevents)
   1405 			pdc_nev_update(tbp->dpp, tbp->cpu, nevents);
   1406 
   1407 		/*
   1408 		 * Write any full set of traces, any remaining data is kept
   1409 		 * for the next pass.
   1410 		 */
   1411 		if (off) {
   1412 			if (write_data(tbp->buf, off) || off == tbp->len) {
   1413 				free(tbp);
   1414 				prev = NULL;
   1415 			}
   1416 			else {
   1417 				/*
   1418 				 * Move valid data to beginning of buffer
   1419 				 */
   1420 				tbp->len -= off;
   1421 				memmove(tbp->buf, tbp->buf + off, tbp->len);
   1422 				prev = tbp;
   1423 			}
   1424 		} else
   1425 			prev = tbp;
   1426 	}
   1427 	hd->prev = prev;
   1428 
   1429 	return entries_handled;
   1430 }
   1431 
   1432 static void __process_trace_bufs(void)
   1433 {
   1434 	int cpu;
   1435 	struct list_head *p;
   1436 	struct list_head list;
   1437 	int handled = 0;
   1438 
   1439 	__list_for_each(p, &devpaths) {
   1440 		struct devpath *dpp = list_entry(p, struct devpath, head);
   1441 		struct tracer_devpath_head *hd = dpp->heads;
   1442 
   1443 		for (cpu = 0; cpu < max_cpus; cpu++, hd++) {
   1444 			pthread_mutex_lock(&hd->mutex);
   1445 			if (list_empty(&hd->head)) {
   1446 				pthread_mutex_unlock(&hd->mutex);
   1447 				continue;
   1448 			}
   1449 
   1450 			list_replace_init(&hd->head, &list);
   1451 			pthread_mutex_unlock(&hd->mutex);
   1452 
   1453 			handled += handle_list(hd, &list);
   1454 		}
   1455 	}
   1456 
   1457 	if (handled)
   1458 		decr_entries(handled);
   1459 }
   1460 
   1461 static void process_trace_bufs(void)
   1462 {
   1463 	while (wait_empty_entries())
   1464 		__process_trace_bufs();
   1465 }
   1466 
   1467 static void clean_trace_bufs(void)
   1468 {
   1469 	/*
   1470 	 * No mutex needed here: we're only reading from the lists,
   1471 	 * tracers are done
   1472 	 */
   1473 	while (dp_entries)
   1474 		__process_trace_bufs();
   1475 }
   1476 
   1477 static inline void read_err(int cpu, char *ifn)
   1478 {
   1479 	if (errno != EAGAIN)
   1480 		fprintf(stderr, "Thread %d failed read of %s: %d/%s\n",
   1481 			cpu, ifn, errno, strerror(errno));
   1482 }
   1483 
   1484 static int net_sendfile(struct io_info *iop)
   1485 {
   1486 	int ret;
   1487 
   1488 	ret = sendfile(iop->ofd, iop->ifd, NULL, iop->ready);
   1489 	if (ret < 0) {
   1490 		perror("sendfile");
   1491 		return 1;
   1492 	} else if (ret < (int)iop->ready) {
   1493 		fprintf(stderr, "short sendfile send (%d of %d)\n",
   1494 			ret, iop->ready);
   1495 		return 1;
   1496 	}
   1497 
   1498 	return 0;
   1499 }
   1500 
   1501 static inline int net_sendfile_data(struct tracer *tp, struct io_info *iop)
   1502 {
   1503 	struct devpath *dpp = iop->dpp;
   1504 
   1505 	if (net_send_header(iop->ofd, tp->cpu, dpp->buts_name, iop->ready))
   1506 		return 1;
   1507 	return net_sendfile(iop);
   1508 }
   1509 
   1510 static int fill_ofname(char *dst, int dstlen, char *subdir, char *buts_name,
   1511 		       int cpu)
   1512 {
   1513 	int len;
   1514 	struct stat sb;
   1515 
   1516 	if (output_dir)
   1517 		len = snprintf(dst, dstlen, "%s/", output_dir);
   1518 	else
   1519 		len = snprintf(dst, dstlen, "./");
   1520 
   1521 	if (subdir)
   1522 		len += snprintf(dst + len, dstlen - len, "%s", subdir);
   1523 
   1524 	if (stat(dst, &sb) < 0) {
   1525 		if (errno != ENOENT) {
   1526 			fprintf(stderr,
   1527 				"Destination dir %s stat failed: %d/%s\n",
   1528 				dst, errno, strerror(errno));
   1529 			return 1;
   1530 		}
   1531 		/*
   1532 		 * There is no synchronization between multiple threads
   1533 		 * trying to create the directory at once.  It's harmless
   1534 		 * to let them try, so just detect the problem and move on.
   1535 		 */
   1536 		if (mkdir(dst, 0755) < 0 && errno != EEXIST) {
   1537 			fprintf(stderr,
   1538 				"Destination dir %s can't be made: %d/%s\n",
   1539 				dst, errno, strerror(errno));
   1540 			return 1;
   1541 		}
   1542 	}
   1543 
   1544 	if (output_name)
   1545 		snprintf(dst + len, dstlen - len, "%s.blktrace.%d",
   1546 			 output_name, cpu);
   1547 	else
   1548 		snprintf(dst + len, dstlen - len, "%s.blktrace.%d",
   1549 			 buts_name, cpu);
   1550 
   1551 	return 0;
   1552 }
   1553 
   1554 static int set_vbuf(struct io_info *iop, int mode, size_t size)
   1555 {
   1556 	iop->obuf = malloc(size);
   1557 	if (setvbuf(iop->ofp, iop->obuf, mode, size) < 0) {
   1558 		fprintf(stderr, "setvbuf(%s, %d) failed: %d/%s\n",
   1559 			iop->dpp->path, (int)size, errno,
   1560 			strerror(errno));
   1561 		free(iop->obuf);
   1562 		return 1;
   1563 	}
   1564 
   1565 	return 0;
   1566 }
   1567 
   1568 static int iop_open(struct io_info *iop, int cpu)
   1569 {
   1570 	char hostdir[MAXPATHLEN + 64];
   1571 
   1572 	iop->ofd = -1;
   1573 	if (net_mode == Net_server) {
   1574 		struct cl_conn *nc = iop->nc;
   1575 		int len;
   1576 
   1577 		len = snprintf(hostdir, sizeof(hostdir), "%s-",
   1578 			       nc->ch->hostname);
   1579 		len += strftime(hostdir + len, sizeof(hostdir) - len, "%F-%T/",
   1580 				gmtime(&iop->dpp->cl_connect_time));
   1581 	} else {
   1582 		hostdir[0] = 0;
   1583 	}
   1584 
   1585 	if (fill_ofname(iop->ofn, sizeof(iop->ofn), hostdir,
   1586 			iop->dpp->buts_name, cpu))
   1587 		return 1;
   1588 
   1589 	iop->ofp = my_fopen(iop->ofn, "w+");
   1590 	if (iop->ofp == NULL) {
   1591 		fprintf(stderr, "Open output file %s failed: %d/%s\n",
   1592 			iop->ofn, errno, strerror(errno));
   1593 		return 1;
   1594 	}
   1595 
   1596 	if (set_vbuf(iop, _IOLBF, FILE_VBUF_SIZE)) {
   1597 		fprintf(stderr, "set_vbuf for file %s failed: %d/%s\n",
   1598 			iop->ofn, errno, strerror(errno));
   1599 		fclose(iop->ofp);
   1600 		return 1;
   1601 	}
   1602 
   1603 	iop->ofd = fileno(iop->ofp);
   1604 	return 0;
   1605 }
   1606 
   1607 static void close_iop(struct io_info *iop)
   1608 {
   1609 	struct mmap_info *mip = &iop->mmap_info;
   1610 
   1611 	if (mip->fs_buf)
   1612 		munmap(mip->fs_buf, mip->fs_buf_len);
   1613 
   1614 	if (!piped_output) {
   1615 		if (ftruncate(fileno(iop->ofp), mip->fs_size) < 0) {
   1616 			fprintf(stderr,
   1617 				"Ignoring err: ftruncate(%s): %d/%s\n",
   1618 				iop->ofn, errno, strerror(errno));
   1619 		}
   1620 	}
   1621 
   1622 	if (iop->ofp)
   1623 		fclose(iop->ofp);
   1624 	if (iop->obuf)
   1625 		free(iop->obuf);
   1626 }
   1627 
   1628 static void close_ios(struct tracer *tp)
   1629 {
   1630 	while (tp->nios > 0) {
   1631 		struct io_info *iop = &tp->ios[--tp->nios];
   1632 
   1633 		iop->dpp->drops = get_drops(iop->dpp);
   1634 		if (iop->ifd >= 0)
   1635 			close(iop->ifd);
   1636 
   1637 		if (iop->ofp)
   1638 			close_iop(iop);
   1639 		else if (iop->ofd >= 0) {
   1640 			struct devpath *dpp = iop->dpp;
   1641 
   1642 			net_send_close(iop->ofd, dpp->buts_name, dpp->drops);
   1643 			net_close_connection(&iop->ofd);
   1644 		}
   1645 	}
   1646 
   1647 	free(tp->ios);
   1648 	free(tp->pfds);
   1649 }
   1650 
   1651 static int open_ios(struct tracer *tp)
   1652 {
   1653 	struct pollfd *pfd;
   1654 	struct io_info *iop;
   1655 	struct list_head *p;
   1656 
   1657 	tp->ios = calloc(ndevs, sizeof(struct io_info));
   1658 	memset(tp->ios, 0, ndevs * sizeof(struct io_info));
   1659 
   1660 	tp->pfds = calloc(ndevs, sizeof(struct pollfd));
   1661 	memset(tp->pfds, 0, ndevs * sizeof(struct pollfd));
   1662 
   1663 	tp->nios = 0;
   1664 	iop = tp->ios;
   1665 	pfd = tp->pfds;
   1666 	__list_for_each(p, &devpaths) {
   1667 		struct devpath *dpp = list_entry(p, struct devpath, head);
   1668 
   1669 		iop->dpp = dpp;
   1670 		iop->ofd = -1;
   1671 		snprintf(iop->ifn, sizeof(iop->ifn), "%s/block/%s/trace%d",
   1672 			debugfs_path, dpp->buts_name, tp->cpu);
   1673 
   1674 		iop->ifd = my_open(iop->ifn, O_RDONLY | O_NONBLOCK);
   1675 		if (iop->ifd < 0) {
   1676 			fprintf(stderr, "Thread %d failed open %s: %d/%s\n",
   1677 				tp->cpu, iop->ifn, errno, strerror(errno));
   1678 			return 1;
   1679 		}
   1680 
   1681 		init_mmap_info(&iop->mmap_info);
   1682 
   1683 		pfd->fd = iop->ifd;
   1684 		pfd->events = POLLIN;
   1685 
   1686 		if (piped_output)
   1687 			;
   1688 		else if (net_client_use_sendfile()) {
   1689 			iop->ofd = net_setup_client();
   1690 			if (iop->ofd < 0)
   1691 				goto err;
   1692 			net_send_open(iop->ofd, tp->cpu, dpp->buts_name);
   1693 		} else if (net_mode == Net_none) {
   1694 			if (iop_open(iop, tp->cpu))
   1695 				goto err;
   1696 		} else {
   1697 			/*
   1698 			 * This ensures that the server knows about all
   1699 			 * connections & devices before _any_ closes
   1700 			 */
   1701 			net_send_open(cl_fds[tp->cpu], tp->cpu, dpp->buts_name);
   1702 		}
   1703 
   1704 		pfd++;
   1705 		iop++;
   1706 		tp->nios++;
   1707 	}
   1708 
   1709 	return 0;
   1710 
   1711 err:
   1712 	close(iop->ifd);	/* tp->nios _not_ bumped */
   1713 	close_ios(tp);
   1714 	return 1;
   1715 }
   1716 
   1717 static int handle_pfds_file(struct tracer *tp, int nevs, int force_read)
   1718 {
   1719 	struct mmap_info *mip;
   1720 	int i, ret, nentries = 0;
   1721 	struct pollfd *pfd = tp->pfds;
   1722 	struct io_info *iop = tp->ios;
   1723 
   1724 	for (i = 0; nevs > 0 && i < ndevs; i++, pfd++, iop++) {
   1725 		if (pfd->revents & POLLIN || force_read) {
   1726 			mip = &iop->mmap_info;
   1727 
   1728 			ret = setup_mmap(iop->ofd, buf_size, mip, tp);
   1729 			if (ret < 0) {
   1730 				pfd->events = 0;
   1731 				break;
   1732 			}
   1733 
   1734 			ret = read(iop->ifd, mip->fs_buf + mip->fs_off,
   1735 				   buf_size);
   1736 			if (ret > 0) {
   1737 				pdc_dr_update(iop->dpp, tp->cpu, ret);
   1738 				mip->fs_size += ret;
   1739 				mip->fs_off += ret;
   1740 				nentries++;
   1741 			} else if (ret == 0) {
   1742 				/*
   1743 				 * Short reads after we're done stop us
   1744 				 * from trying reads.
   1745 				 */
   1746 				if (tp->is_done)
   1747 					clear_events(pfd);
   1748 			} else {
   1749 				read_err(tp->cpu, iop->ifn);
   1750 				if (errno != EAGAIN || tp->is_done)
   1751 					clear_events(pfd);
   1752 			}
   1753 			nevs--;
   1754 		}
   1755 	}
   1756 
   1757 	return nentries;
   1758 }
   1759 
   1760 static int handle_pfds_netclient(struct tracer *tp, int nevs, int force_read)
   1761 {
   1762 	struct stat sb;
   1763 	int i, nentries = 0;
   1764 	struct pollfd *pfd = tp->pfds;
   1765 	struct io_info *iop = tp->ios;
   1766 
   1767 	for (i = 0; i < ndevs; i++, pfd++, iop++) {
   1768 		if (pfd->revents & POLLIN || force_read) {
   1769 			if (fstat(iop->ifd, &sb) < 0) {
   1770 				perror(iop->ifn);
   1771 				pfd->events = 0;
   1772 			} else if (sb.st_size > (off_t)iop->data_queued) {
   1773 				iop->ready = sb.st_size - iop->data_queued;
   1774 				iop->data_queued = sb.st_size;
   1775 
   1776 				if (!net_sendfile_data(tp, iop)) {
   1777 					pdc_dr_update(iop->dpp, tp->cpu,
   1778 						      iop->ready);
   1779 					nentries++;
   1780 				} else
   1781 					clear_events(pfd);
   1782 			}
   1783 			if (--nevs == 0)
   1784 				break;
   1785 		}
   1786 	}
   1787 
   1788 	if (nentries)
   1789 		incr_entries(nentries);
   1790 
   1791 	return nentries;
   1792 }
   1793 
   1794 static int handle_pfds_entries(struct tracer *tp, int nevs, int force_read)
   1795 {
   1796 	int i, nentries = 0;
   1797 	struct trace_buf *tbp;
   1798 	struct pollfd *pfd = tp->pfds;
   1799 	struct io_info *iop = tp->ios;
   1800 
   1801 	tbp = alloc_trace_buf(tp->cpu, buf_size);
   1802 	for (i = 0; i < ndevs; i++, pfd++, iop++) {
   1803 		if (pfd->revents & POLLIN || force_read) {
   1804 			tbp->len = read(iop->ifd, tbp->buf, buf_size);
   1805 			if (tbp->len > 0) {
   1806 				pdc_dr_update(iop->dpp, tp->cpu, tbp->len);
   1807 				add_trace_buf(iop->dpp, tp->cpu, &tbp);
   1808 				nentries++;
   1809 			} else if (tbp->len == 0) {
   1810 				/*
   1811 				 * Short reads after we're done stop us
   1812 				 * from trying reads.
   1813 				 */
   1814 				if (tp->is_done)
   1815 					clear_events(pfd);
   1816 			} else {
   1817 				read_err(tp->cpu, iop->ifn);
   1818 				if (errno != EAGAIN || tp->is_done)
   1819 					clear_events(pfd);
   1820 			}
   1821 			if (!piped_output && --nevs == 0)
   1822 				break;
   1823 		}
   1824 	}
   1825 	free(tbp);
   1826 
   1827 	if (nentries)
   1828 		incr_entries(nentries);
   1829 
   1830 	return nentries;
   1831 }
   1832 
   1833 static void *thread_main(void *arg)
   1834 {
   1835 	int ret, ndone, to_val;
   1836 	struct tracer *tp = arg;
   1837 
   1838 	ret = lock_on_cpu(tp->cpu);
   1839 	if (ret)
   1840 		goto err;
   1841 
   1842 	ret = open_ios(tp);
   1843 	if (ret)
   1844 		goto err;
   1845 
   1846 	if (piped_output)
   1847 		to_val = 50;		/* Frequent partial handles */
   1848 	else
   1849 		to_val = 500;		/* 1/2 second intervals */
   1850 
   1851 
   1852 	tracer_signal_ready(tp, Th_running, 0);
   1853 	tracer_wait_unblock(tp);
   1854 
   1855 	while (!tp->is_done) {
   1856 		ndone = poll(tp->pfds, ndevs, to_val);
   1857 		if (ndone || piped_output)
   1858 			(void)handle_pfds(tp, ndone, piped_output);
   1859 		else if (ndone < 0 && errno != EINTR)
   1860 			fprintf(stderr, "Thread %d poll failed: %d/%s\n",
   1861 				tp->cpu, errno, strerror(errno));
   1862 	}
   1863 
   1864 	/*
   1865 	 * Trace is stopped, pull data until we get a short read
   1866 	 */
   1867 	while (handle_pfds(tp, ndevs, 1) > 0)
   1868 		;
   1869 
   1870 	close_ios(tp);
   1871 	tracer_signal_ready(tp, Th_leaving, 0);
   1872 	return NULL;
   1873 
   1874 err:
   1875 	tracer_signal_ready(tp, Th_error, ret);
   1876 	return NULL;
   1877 }
   1878 
   1879 static int start_tracer(int cpu)
   1880 {
   1881 	struct tracer *tp;
   1882 
   1883 	tp = malloc(sizeof(*tp));
   1884 	memset(tp, 0, sizeof(*tp));
   1885 
   1886 	INIT_LIST_HEAD(&tp->head);
   1887 	tp->status = 0;
   1888 	tp->cpu = cpu;
   1889 
   1890 	if (pthread_create(&tp->thread, NULL, thread_main, tp)) {
   1891 		fprintf(stderr, "FAILED to start thread on CPU %d: %d/%s\n",
   1892 			cpu, errno, strerror(errno));
   1893 		free(tp);
   1894 		return 1;
   1895 	}
   1896 
   1897 	list_add_tail(&tp->head, &tracers);
   1898 	return 0;
   1899 }
   1900 
   1901 static int create_output_files(int cpu)
   1902 {
   1903 	char fname[MAXPATHLEN + 64];
   1904 	struct list_head *p;
   1905 	FILE *f;
   1906 
   1907 	__list_for_each(p, &devpaths) {
   1908 		struct devpath *dpp = list_entry(p, struct devpath, head);
   1909 
   1910 		if (fill_ofname(fname, sizeof(fname), NULL, dpp->buts_name,
   1911 				cpu))
   1912 			return 1;
   1913 		f = my_fopen(fname, "w+");
   1914 		if (!f)
   1915 			return 1;
   1916 		fclose(f);
   1917 	}
   1918 	return 0;
   1919 }
   1920 
   1921 static void start_tracers(void)
   1922 {
   1923 	int cpu, started = 0;
   1924 	struct list_head *p;
   1925 	size_t alloc_size = CPU_ALLOC_SIZE(max_cpus);
   1926 
   1927 	for (cpu = 0; cpu < max_cpus; cpu++) {
   1928 		if (!CPU_ISSET_S(cpu, alloc_size, online_cpus)) {
   1929 			/*
   1930 			 * Create fake empty output files so that other tools
   1931 			 * like blkparse don't have to bother with sparse CPU
   1932 			 * number space.
   1933 			 */
   1934 			if (create_output_files(cpu))
   1935 				break;
   1936 			continue;
   1937 		}
   1938 		if (start_tracer(cpu))
   1939 			break;
   1940 		started++;
   1941 	}
   1942 
   1943 	wait_tracers_ready(started);
   1944 
   1945 	__list_for_each(p, &tracers) {
   1946 		struct tracer *tp = list_entry(p, struct tracer, head);
   1947 		if (tp->status)
   1948 			fprintf(stderr,
   1949 				"FAILED to start thread on CPU %d: %d/%s\n",
   1950 				tp->cpu, tp->status, strerror(tp->status));
   1951 	}
   1952 }
   1953 
   1954 static void stop_tracers(void)
   1955 {
   1956 	struct list_head *p;
   1957 
   1958 	/*
   1959 	 * Stop the tracing - makes the tracer threads clean up quicker.
   1960 	 */
   1961 	__list_for_each(p, &devpaths) {
   1962 		struct devpath *dpp = list_entry(p, struct devpath, head);
   1963 		(void)ioctl(dpp->fd, BLKTRACESTOP);
   1964 	}
   1965 
   1966 	/*
   1967 	 * Tell each tracer to quit
   1968 	 */
   1969 	__list_for_each(p, &tracers) {
   1970 		struct tracer *tp = list_entry(p, struct tracer, head);
   1971 		tp->is_done = 1;
   1972 	}
   1973 	pthread_cond_broadcast(&mt_cond);
   1974 }
   1975 
   1976 static void del_tracers(void)
   1977 {
   1978 	struct list_head *p, *q;
   1979 
   1980 	list_for_each_safe(p, q, &tracers) {
   1981 		struct tracer *tp = list_entry(p, struct tracer, head);
   1982 
   1983 		list_del(&tp->head);
   1984 		free(tp);
   1985 	}
   1986 }
   1987 
   1988 static void wait_tracers(void)
   1989 {
   1990 	struct list_head *p;
   1991 
   1992 	if (use_tracer_devpaths())
   1993 		process_trace_bufs();
   1994 
   1995 	wait_tracers_leaving();
   1996 
   1997 	__list_for_each(p, &tracers) {
   1998 		int ret;
   1999 		struct tracer *tp = list_entry(p, struct tracer, head);
   2000 
   2001 		ret = pthread_join(tp->thread, NULL);
   2002 		if (ret)
   2003 			fprintf(stderr, "Thread join %d failed %d\n",
   2004 				tp->cpu, ret);
   2005 	}
   2006 
   2007 	if (use_tracer_devpaths())
   2008 		clean_trace_bufs();
   2009 
   2010 	get_all_drops();
   2011 }
   2012 
   2013 static void exit_tracing(void)
   2014 {
   2015 	signal(SIGINT, SIG_IGN);
   2016 	signal(SIGHUP, SIG_IGN);
   2017 	signal(SIGTERM, SIG_IGN);
   2018 	signal(SIGALRM, SIG_IGN);
   2019 
   2020 	stop_tracers();
   2021 	wait_tracers();
   2022 	del_tracers();
   2023 	rel_devpaths();
   2024 }
   2025 
   2026 static void handle_sigint(__attribute__((__unused__)) int sig)
   2027 {
   2028 	done = 1;
   2029 	stop_tracers();
   2030 }
   2031 
   2032 static void show_stats(struct list_head *devpaths)
   2033 {
   2034 	FILE *ofp;
   2035 	struct list_head *p;
   2036 	unsigned long long nevents, data_read;
   2037 	unsigned long long total_drops = 0;
   2038 	unsigned long long total_events = 0;
   2039 
   2040 	if (piped_output)
   2041 		ofp = my_fopen("/dev/null", "w");
   2042 	else
   2043 		ofp = stdout;
   2044 
   2045 	__list_for_each(p, devpaths) {
   2046 		int cpu;
   2047 		struct pdc_stats *sp;
   2048 		struct devpath *dpp = list_entry(p, struct devpath, head);
   2049 
   2050 		if (net_mode == Net_server)
   2051 			printf("server: end of run for %s:%s\n",
   2052 				dpp->ch->hostname, dpp->buts_name);
   2053 
   2054 		data_read = 0;
   2055 		nevents = 0;
   2056 
   2057 		fprintf(ofp, "=== %s ===\n", dpp->buts_name);
   2058 		for (cpu = 0, sp = dpp->stats; cpu < dpp->ncpus; cpu++, sp++) {
   2059 			/*
   2060 			 * Estimate events if not known...
   2061 			 */
   2062 			if (sp->nevents == 0) {
   2063 				sp->nevents = sp->data_read /
   2064 						sizeof(struct blk_io_trace);
   2065 			}
   2066 
   2067 			fprintf(ofp,
   2068 				"  CPU%3d: %20llu events, %8llu KiB data\n",
   2069 				cpu, sp->nevents, (sp->data_read + 1023) >> 10);
   2070 
   2071 			data_read += sp->data_read;
   2072 			nevents += sp->nevents;
   2073 		}
   2074 
   2075 		fprintf(ofp, "  Total:  %20llu events (dropped %llu),"
   2076 			     " %8llu KiB data\n", nevents,
   2077 			     dpp->drops, (data_read + 1024) >> 10);
   2078 
   2079 		total_drops += dpp->drops;
   2080 		total_events += (nevents + dpp->drops);
   2081 	}
   2082 
   2083 	fflush(ofp);
   2084 	if (piped_output)
   2085 		fclose(ofp);
   2086 
   2087 	if (total_drops) {
   2088 		double drops_ratio = 1.0;
   2089 
   2090 		if (total_events)
   2091 			drops_ratio = (double)total_drops/(double)total_events;
   2092 
   2093 		fprintf(stderr, "\nYou have %llu (%5.1lf%%) dropped events\n"
   2094 				"Consider using a larger buffer size (-b) "
   2095 				"and/or more buffers (-n)\n",
   2096 			total_drops, 100.0 * drops_ratio);
   2097 	}
   2098 }
   2099 
   2100 static int handle_args(int argc, char *argv[])
   2101 {
   2102 	int c, i;
   2103 	struct statfs st;
   2104 	int act_mask_tmp = 0;
   2105 
   2106 	while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
   2107 		switch (c) {
   2108 		case 'a':
   2109 			i = find_mask_map(optarg);
   2110 			if (i < 0) {
   2111 				fprintf(stderr, "Invalid action mask %s\n",
   2112 					optarg);
   2113 				return 1;
   2114 			}
   2115 			act_mask_tmp |= i;
   2116 			break;
   2117 
   2118 		case 'A':
   2119 			if ((sscanf(optarg, "%x", &i) != 1) ||
   2120 							!valid_act_opt(i)) {
   2121 				fprintf(stderr,
   2122 					"Invalid set action mask %s/0x%x\n",
   2123 					optarg, i);
   2124 				return 1;
   2125 			}
   2126 			act_mask_tmp = i;
   2127 			break;
   2128 
   2129 		case 'd':
   2130 			if (add_devpath(optarg) != 0)
   2131 				return 1;
   2132 			break;
   2133 
   2134 		case 'I': {
   2135 			char dev_line[256];
   2136 			FILE *ifp = my_fopen(optarg, "r");
   2137 
   2138 			if (!ifp) {
   2139 				fprintf(stderr,
   2140 					"Invalid file for devices %s\n",
   2141 					optarg);
   2142 				return 1;
   2143 			}
   2144 
   2145 			while (fscanf(ifp, "%s\n", dev_line) == 1) {
   2146 				if (add_devpath(dev_line) != 0) {
   2147 					fclose(ifp);
   2148 					return 1;
   2149 				}
   2150 			}
   2151 			fclose(ifp);
   2152 			break;
   2153 		}
   2154 
   2155 		case 'r':
   2156 			debugfs_path = optarg;
   2157 			break;
   2158 
   2159 		case 'o':
   2160 			output_name = optarg;
   2161 			break;
   2162 		case 'k':
   2163 			kill_running_trace = 1;
   2164 			break;
   2165 		case 'w':
   2166 			stop_watch = atoi(optarg);
   2167 			if (stop_watch <= 0) {
   2168 				fprintf(stderr,
   2169 					"Invalid stopwatch value (%d secs)\n",
   2170 					stop_watch);
   2171 				return 1;
   2172 			}
   2173 			break;
   2174 		case 'V':
   2175 		case 'v':
   2176 			printf("%s version %s\n", argv[0], blktrace_version);
   2177 			exit(0);
   2178 			/*NOTREACHED*/
   2179 		case 'b':
   2180 			buf_size = strtoul(optarg, NULL, 10);
   2181 			if (buf_size <= 0 || buf_size > 16*1024) {
   2182 				fprintf(stderr, "Invalid buffer size (%lu)\n",
   2183 					buf_size);
   2184 				return 1;
   2185 			}
   2186 			buf_size <<= 10;
   2187 			break;
   2188 		case 'n':
   2189 			buf_nr = strtoul(optarg, NULL, 10);
   2190 			if (buf_nr <= 0) {
   2191 				fprintf(stderr,
   2192 					"Invalid buffer nr (%lu)\n", buf_nr);
   2193 				return 1;
   2194 			}
   2195 			break;
   2196 		case 'D':
   2197 			output_dir = optarg;
   2198 			break;
   2199 		case 'h':
   2200 			net_mode = Net_client;
   2201 			memset(hostname, 0, sizeof(hostname));
   2202 			strncpy(hostname, optarg, sizeof(hostname));
   2203 			hostname[sizeof(hostname) - 1] = '\0';
   2204 			break;
   2205 		case 'l':
   2206 			net_mode = Net_server;
   2207 			break;
   2208 		case 'p':
   2209 			net_port = atoi(optarg);
   2210 			break;
   2211 		case 's':
   2212 			net_use_sendfile = 0;
   2213 			break;
   2214 		default:
   2215 			show_usage(argv[0]);
   2216 			exit(1);
   2217 			/*NOTREACHED*/
   2218 		}
   2219 	}
   2220 
   2221 	while (optind < argc)
   2222 		if (add_devpath(argv[optind++]) != 0)
   2223 			return 1;
   2224 
   2225 	if (net_mode != Net_server && ndevs == 0) {
   2226 		show_usage(argv[0]);
   2227 		return 1;
   2228 	}
   2229 
   2230 	if (statfs(debugfs_path, &st) < 0) {
   2231 		fprintf(stderr, "Invalid debug path %s: %d/%s\n",
   2232 			debugfs_path, errno, strerror(errno));
   2233 		return 1;
   2234 	}
   2235 
   2236 	if (st.f_type != (long)DEBUGFS_TYPE) {
   2237 		fprintf(stderr, "Debugfs is not mounted at %s\n", debugfs_path);
   2238 		return 1;
   2239 	}
   2240 
   2241 	if (act_mask_tmp != 0)
   2242 		act_mask = act_mask_tmp;
   2243 
   2244 	if (net_mode == Net_client && net_setup_addr())
   2245 		return 1;
   2246 
   2247 	/*
   2248 	 * Set up for appropriate PFD handler based upon output name.
   2249 	 */
   2250 	if (net_client_use_sendfile())
   2251 		handle_pfds = handle_pfds_netclient;
   2252 	else if (net_client_use_send())
   2253 		handle_pfds = handle_pfds_entries;
   2254 	else if (output_name && (strcmp(output_name, "-") == 0)) {
   2255 		piped_output = 1;
   2256 		handle_pfds = handle_pfds_entries;
   2257 		pfp = stdout;
   2258 		if (setvbuf(pfp, NULL, _IONBF, 0)) {
   2259 			perror("setvbuf stdout");
   2260 			return 1;
   2261 		}
   2262 	} else
   2263 		handle_pfds = handle_pfds_file;
   2264 	return 0;
   2265 }
   2266 
   2267 static void ch_add_connection(struct net_server_s *ns, struct cl_host *ch,
   2268 			      int fd)
   2269 {
   2270 	struct cl_conn *nc;
   2271 
   2272 	nc = malloc(sizeof(*nc));
   2273 	memset(nc, 0, sizeof(*nc));
   2274 
   2275 	time(&nc->connect_time);
   2276 	nc->ch = ch;
   2277 	nc->fd = fd;
   2278 	nc->ncpus = -1;
   2279 
   2280 	list_add_tail(&nc->ch_head, &ch->conn_list);
   2281 	ch->connects++;
   2282 
   2283 	list_add_tail(&nc->ns_head, &ns->conn_list);
   2284 	ns->connects++;
   2285 	ns->pfds = realloc(ns->pfds, (ns->connects+1) * sizeof(struct pollfd));
   2286 }
   2287 
   2288 static void ch_rem_connection(struct net_server_s *ns, struct cl_host *ch,
   2289 			      struct cl_conn *nc)
   2290 {
   2291 	net_close_connection(&nc->fd);
   2292 
   2293 	list_del(&nc->ch_head);
   2294 	ch->connects--;
   2295 
   2296 	list_del(&nc->ns_head);
   2297 	ns->connects--;
   2298 	ns->pfds = realloc(ns->pfds, (ns->connects+1) * sizeof(struct pollfd));
   2299 
   2300 	free(nc);
   2301 }
   2302 
   2303 static struct cl_host *net_find_client_host(struct net_server_s *ns,
   2304 					    struct in_addr cl_in_addr)
   2305 {
   2306 	struct list_head *p;
   2307 
   2308 	__list_for_each(p, &ns->ch_list) {
   2309 		struct cl_host *ch = list_entry(p, struct cl_host, head);
   2310 
   2311 		if (in_addr_eq(ch->cl_in_addr, cl_in_addr))
   2312 			return ch;
   2313 	}
   2314 
   2315 	return NULL;
   2316 }
   2317 
   2318 static struct cl_host *net_add_client_host(struct net_server_s *ns,
   2319 					   struct sockaddr_in *addr)
   2320 {
   2321 	struct cl_host *ch;
   2322 
   2323 	ch = malloc(sizeof(*ch));
   2324 	memset(ch, 0, sizeof(*ch));
   2325 
   2326 	ch->ns = ns;
   2327 	ch->cl_in_addr = addr->sin_addr;
   2328 	list_add_tail(&ch->head, &ns->ch_list);
   2329 	ns->nchs++;
   2330 
   2331 	ch->hostname = strdup(inet_ntoa(addr->sin_addr));
   2332 	printf("server: connection from %s\n", ch->hostname);
   2333 
   2334 	INIT_LIST_HEAD(&ch->conn_list);
   2335 	INIT_LIST_HEAD(&ch->devpaths);
   2336 
   2337 	return ch;
   2338 }
   2339 
   2340 static void device_done(struct devpath *dpp, int ncpus)
   2341 {
   2342 	int cpu;
   2343 	struct io_info *iop;
   2344 
   2345 	for (cpu = 0, iop = dpp->ios; cpu < ncpus; cpu++, iop++)
   2346 		close_iop(iop);
   2347 
   2348 	list_del(&dpp->head);
   2349 	dpp_free(dpp);
   2350 }
   2351 
   2352 static void net_ch_remove(struct cl_host *ch, int ncpus)
   2353 {
   2354 	struct list_head *p, *q;
   2355 	struct net_server_s *ns = ch->ns;
   2356 
   2357 	list_for_each_safe(p, q, &ch->devpaths) {
   2358 		struct devpath *dpp = list_entry(p, struct devpath, head);
   2359 		device_done(dpp, ncpus);
   2360 	}
   2361 
   2362 	list_for_each_safe(p, q, &ch->conn_list) {
   2363 		struct cl_conn *nc = list_entry(p, struct cl_conn, ch_head);
   2364 
   2365 		ch_rem_connection(ns, ch, nc);
   2366 	}
   2367 
   2368 	list_del(&ch->head);
   2369 	ns->nchs--;
   2370 
   2371 	if (ch->hostname)
   2372 		free(ch->hostname);
   2373 	free(ch);
   2374 }
   2375 
   2376 static void net_add_connection(struct net_server_s *ns)
   2377 {
   2378 	int fd;
   2379 	struct cl_host *ch;
   2380 	socklen_t socklen = sizeof(ns->addr);
   2381 
   2382 	fd = my_accept(ns->listen_fd, (struct sockaddr *)&ns->addr, &socklen);
   2383 	if (fd < 0) {
   2384 		/*
   2385 		 * This is OK: we just won't accept this connection,
   2386 		 * nothing fatal.
   2387 		 */
   2388 		perror("accept");
   2389 	} else {
   2390 		ch = net_find_client_host(ns, ns->addr.sin_addr);
   2391 		if (!ch)
   2392 			ch = net_add_client_host(ns, &ns->addr);
   2393 
   2394 		ch_add_connection(ns, ch, fd);
   2395 	}
   2396 }
   2397 
   2398 static struct devpath *nc_add_dpp(struct cl_conn *nc,
   2399 				  struct blktrace_net_hdr *bnh,
   2400 				  time_t connect_time)
   2401 {
   2402 	int cpu;
   2403 	struct io_info *iop;
   2404 	struct devpath *dpp;
   2405 
   2406 	dpp = malloc(sizeof(*dpp));
   2407 	memset(dpp, 0, sizeof(*dpp));
   2408 
   2409 	dpp->buts_name = strdup(bnh->buts_name);
   2410 	dpp->path = strdup(bnh->buts_name);
   2411 	dpp->fd = -1;
   2412 	dpp->ch = nc->ch;
   2413 	dpp->cl_id = bnh->cl_id;
   2414 	dpp->cl_connect_time = connect_time;
   2415 	dpp->ncpus = nc->ncpus;
   2416 	dpp->stats = calloc(dpp->ncpus, sizeof(*dpp->stats));
   2417 	memset(dpp->stats, 0, dpp->ncpus * sizeof(*dpp->stats));
   2418 
   2419 	list_add_tail(&dpp->head, &nc->ch->devpaths);
   2420 	nc->ch->ndevs++;
   2421 
   2422 	dpp->ios = calloc(nc->ncpus, sizeof(*iop));
   2423 	memset(dpp->ios, 0, ndevs * sizeof(*iop));
   2424 
   2425 	for (cpu = 0, iop = dpp->ios; cpu < nc->ncpus; cpu++, iop++) {
   2426 		iop->dpp = dpp;
   2427 		iop->nc = nc;
   2428 		init_mmap_info(&iop->mmap_info);
   2429 
   2430 		if (iop_open(iop, cpu))
   2431 			goto err;
   2432 	}
   2433 
   2434 	return dpp;
   2435 
   2436 err:
   2437 	/*
   2438 	 * Need to unravel what's been done...
   2439 	 */
   2440 	while (cpu >= 0)
   2441 		close_iop(&dpp->ios[cpu--]);
   2442 	dpp_free(dpp);
   2443 
   2444 	return NULL;
   2445 }
   2446 
   2447 static struct devpath *nc_find_dpp(struct cl_conn *nc,
   2448 				   struct blktrace_net_hdr *bnh)
   2449 {
   2450 	struct list_head *p;
   2451 	time_t connect_time = nc->connect_time;
   2452 
   2453 	__list_for_each(p, &nc->ch->devpaths) {
   2454 		struct devpath *dpp = list_entry(p, struct devpath, head);
   2455 
   2456 		if (!strcmp(dpp->buts_name, bnh->buts_name))
   2457 			return dpp;
   2458 
   2459 		if (dpp->cl_id == bnh->cl_id)
   2460 			connect_time = dpp->cl_connect_time;
   2461 	}
   2462 
   2463 	return nc_add_dpp(nc, bnh, connect_time);
   2464 }
   2465 
   2466 static void net_client_read_data(struct cl_conn *nc, struct devpath *dpp,
   2467 				 struct blktrace_net_hdr *bnh)
   2468 {
   2469 	int ret;
   2470 	struct io_info *iop = &dpp->ios[bnh->cpu];
   2471 	struct mmap_info *mip = &iop->mmap_info;
   2472 
   2473 	if (setup_mmap(iop->ofd, bnh->len, &iop->mmap_info, NULL)) {
   2474 		fprintf(stderr, "ncd(%s:%d): mmap failed\n",
   2475 			nc->ch->hostname, nc->fd);
   2476 		exit(1);
   2477 	}
   2478 
   2479 	ret = net_recv_data(nc->fd, mip->fs_buf + mip->fs_off, bnh->len);
   2480 	if (ret > 0) {
   2481 		pdc_dr_update(dpp, bnh->cpu, ret);
   2482 		mip->fs_size += ret;
   2483 		mip->fs_off += ret;
   2484 	} else if (ret < 0)
   2485 		exit(1);
   2486 }
   2487 
   2488 /*
   2489  * Returns 1 if we closed a host - invalidates other polling information
   2490  * that may be present.
   2491  */
   2492 static int net_client_data(struct cl_conn *nc)
   2493 {
   2494 	int ret;
   2495 	struct devpath *dpp;
   2496 	struct blktrace_net_hdr bnh;
   2497 
   2498 	ret = net_get_header(nc, &bnh);
   2499 	if (ret == 0)
   2500 		return 0;
   2501 
   2502 	if (ret < 0) {
   2503 		fprintf(stderr, "ncd(%d): header read failed\n", nc->fd);
   2504 		exit(1);
   2505 	}
   2506 
   2507 	if (data_is_native == -1 && check_data_endianness(bnh.magic)) {
   2508 		fprintf(stderr, "ncd(%d): received data is bad\n", nc->fd);
   2509 		exit(1);
   2510 	}
   2511 
   2512 	if (!data_is_native) {
   2513 		bnh.magic = be32_to_cpu(bnh.magic);
   2514 		bnh.cpu = be32_to_cpu(bnh.cpu);
   2515 		bnh.max_cpus = be32_to_cpu(bnh.max_cpus);
   2516 		bnh.len = be32_to_cpu(bnh.len);
   2517 		bnh.cl_id = be32_to_cpu(bnh.cl_id);
   2518 		bnh.buf_size = be32_to_cpu(bnh.buf_size);
   2519 		bnh.buf_nr = be32_to_cpu(bnh.buf_nr);
   2520 		bnh.page_size = be32_to_cpu(bnh.page_size);
   2521 	}
   2522 
   2523 	if ((bnh.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
   2524 		fprintf(stderr, "ncd(%s:%d): bad data magic\n",
   2525 			nc->ch->hostname, nc->fd);
   2526 		exit(1);
   2527 	}
   2528 
   2529 	if (nc->ncpus == -1)
   2530 		nc->ncpus = bnh.max_cpus;
   2531 
   2532 	/*
   2533 	 * len == 0 means the other end is sending us a new connection/dpp
   2534 	 * len == 1 means that the other end signalled end-of-run
   2535 	 */
   2536 	dpp = nc_find_dpp(nc, &bnh);
   2537 	if (bnh.len == 0) {
   2538 		/*
   2539 		 * Just adding in the dpp above is enough
   2540 		 */
   2541 		ack_open_close(nc->fd, dpp->buts_name);
   2542 		nc->ch->cl_opens++;
   2543 	} else if (bnh.len == 1) {
   2544 		/*
   2545 		 * overload cpu count with dropped events
   2546 		 */
   2547 		dpp->drops = bnh.cpu;
   2548 
   2549 		ack_open_close(nc->fd, dpp->buts_name);
   2550 		if (--nc->ch->cl_opens == 0) {
   2551 			show_stats(&nc->ch->devpaths);
   2552 			net_ch_remove(nc->ch, nc->ncpus);
   2553 			return 1;
   2554 		}
   2555 	} else
   2556 		net_client_read_data(nc, dpp, &bnh);
   2557 
   2558 	return 0;
   2559 }
   2560 
   2561 static void handle_client_data(struct net_server_s *ns, int events)
   2562 {
   2563 	struct cl_conn *nc;
   2564 	struct pollfd *pfd;
   2565 	struct list_head *p, *q;
   2566 
   2567 	pfd = &ns->pfds[1];
   2568 	list_for_each_safe(p, q, &ns->conn_list) {
   2569 		if (pfd->revents & POLLIN) {
   2570 			nc = list_entry(p, struct cl_conn, ns_head);
   2571 
   2572 			if (net_client_data(nc) || --events == 0)
   2573 				break;
   2574 		}
   2575 		pfd++;
   2576 	}
   2577 }
   2578 
   2579 static void net_setup_pfds(struct net_server_s *ns)
   2580 {
   2581 	struct pollfd *pfd;
   2582 	struct list_head *p;
   2583 
   2584 	ns->pfds[0].fd = ns->listen_fd;
   2585 	ns->pfds[0].events = POLLIN;
   2586 
   2587 	pfd = &ns->pfds[1];
   2588 	__list_for_each(p, &ns->conn_list) {
   2589 		struct cl_conn *nc = list_entry(p, struct cl_conn, ns_head);
   2590 
   2591 		pfd->fd = nc->fd;
   2592 		pfd->events = POLLIN;
   2593 		pfd++;
   2594 	}
   2595 }
   2596 
   2597 static int net_server_handle_connections(struct net_server_s *ns)
   2598 {
   2599 	int events;
   2600 
   2601 	printf("server: waiting for connections...\n");
   2602 
   2603 	while (!done) {
   2604 		net_setup_pfds(ns);
   2605 		events = poll(ns->pfds, ns->connects + 1, -1);
   2606 		if (events < 0) {
   2607 			if (errno != EINTR) {
   2608 				perror("FATAL: poll error");
   2609 				return 1;
   2610 			}
   2611 		} else if (events > 0) {
   2612 			if (ns->pfds[0].revents & POLLIN) {
   2613 				net_add_connection(ns);
   2614 				events--;
   2615 			}
   2616 
   2617 			if (events)
   2618 				handle_client_data(ns, events);
   2619 		}
   2620 	}
   2621 
   2622 	return 0;
   2623 }
   2624 
   2625 static int net_server(void)
   2626 {
   2627 	int fd, opt;
   2628 	int ret = 1;
   2629 	struct net_server_s net_server;
   2630 	struct net_server_s *ns = &net_server;
   2631 
   2632 	memset(ns, 0, sizeof(*ns));
   2633 	INIT_LIST_HEAD(&ns->ch_list);
   2634 	INIT_LIST_HEAD(&ns->conn_list);
   2635 	ns->pfds = malloc(sizeof(struct pollfd));
   2636 
   2637 	fd = my_socket(AF_INET, SOCK_STREAM, 0);
   2638 	if (fd < 0) {
   2639 		perror("server: socket");
   2640 		goto out;
   2641 	}
   2642 
   2643 	opt = 1;
   2644 	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) {
   2645 		perror("setsockopt");
   2646 		goto out;
   2647 	}
   2648 
   2649 	memset(&ns->addr, 0, sizeof(ns->addr));
   2650 	ns->addr.sin_family = AF_INET;
   2651 	ns->addr.sin_addr.s_addr = htonl(INADDR_ANY);
   2652 	ns->addr.sin_port = htons(net_port);
   2653 
   2654 	if (bind(fd, (struct sockaddr *) &ns->addr, sizeof(ns->addr)) < 0) {
   2655 		perror("bind");
   2656 		goto out;
   2657 	}
   2658 
   2659 	if (listen(fd, 1) < 0) {
   2660 		perror("listen");
   2661 		goto out;
   2662 	}
   2663 
   2664 	/*
   2665 	 * The actual server looping is done here:
   2666 	 */
   2667 	ns->listen_fd = fd;
   2668 	ret = net_server_handle_connections(ns);
   2669 
   2670 	/*
   2671 	 * Clean up and return...
   2672 	 */
   2673 out:
   2674 	free(ns->pfds);
   2675 	return ret;
   2676 }
   2677 
   2678 static int run_tracers(void)
   2679 {
   2680 	atexit(exit_tracing);
   2681 	if (net_mode == Net_client)
   2682 		printf("blktrace: connecting to %s\n", hostname);
   2683 
   2684 	if (setup_buts())
   2685 		return 1;
   2686 
   2687 	if (use_tracer_devpaths()) {
   2688 		if (setup_tracer_devpaths())
   2689 			return 1;
   2690 
   2691 		if (piped_output)
   2692 			handle_list = handle_list_file;
   2693 		else
   2694 			handle_list = handle_list_net;
   2695 	}
   2696 
   2697 	start_tracers();
   2698 	if (nthreads_running == ncpus) {
   2699 		unblock_tracers();
   2700 		start_buts();
   2701 		if (net_mode == Net_client)
   2702 			printf("blktrace: connected!\n");
   2703 		if (stop_watch)
   2704 			alarm(stop_watch);
   2705 	} else
   2706 		stop_tracers();
   2707 
   2708 	wait_tracers();
   2709 	if (nthreads_running == ncpus)
   2710 		show_stats(&devpaths);
   2711 	if (net_client_use_send())
   2712 		close_client_connections();
   2713 	del_tracers();
   2714 
   2715 	return 0;
   2716 }
   2717 
   2718 static cpu_set_t *get_online_cpus(void)
   2719 {
   2720 	FILE *cpus;
   2721 	cpu_set_t *set;
   2722 	size_t alloc_size;
   2723 	int cpuid, prevcpuid = -1;
   2724 	char nextch;
   2725 	int n, ncpu, curcpu = 0;
   2726 	int *cpu_nums;
   2727 
   2728 	ncpu = sysconf(_SC_NPROCESSORS_CONF);
   2729 	if (ncpu < 0)
   2730 		return NULL;
   2731 
   2732 	cpu_nums = malloc(sizeof(int)*ncpu);
   2733 	if (!cpu_nums) {
   2734 		errno = ENOMEM;
   2735 		return NULL;
   2736 	}
   2737 
   2738 	/*
   2739 	 * There is no way to easily get maximum CPU number. So we have to
   2740 	 * parse the file first to find it out and then create appropriate
   2741 	 * cpuset
   2742 	 */
   2743 	cpus = my_fopen("/sys/devices/system/cpu/online", "r");
   2744 	for (;;) {
   2745 		n = fscanf(cpus, "%d%c", &cpuid, &nextch);
   2746 		if (n <= 0)
   2747 			break;
   2748 		if (n == 2 && nextch == '-') {
   2749 			prevcpuid = cpuid;
   2750 			continue;
   2751 		}
   2752 		if (prevcpuid == -1)
   2753 			prevcpuid = cpuid;
   2754 		while (prevcpuid <= cpuid) {
   2755 			/* More CPUs listed than configured? */
   2756 			if (curcpu >= ncpu) {
   2757 				errno = EINVAL;
   2758 				return NULL;
   2759 			}
   2760 			cpu_nums[curcpu++] = prevcpuid++;
   2761 		}
   2762 		prevcpuid = -1;
   2763 	}
   2764 	fclose(cpus);
   2765 
   2766 	ncpu = curcpu;
   2767 	max_cpus = cpu_nums[ncpu - 1] + 1;
   2768 
   2769 	/* Now that we have maximum cpu number, create a cpuset */
   2770 	set = CPU_ALLOC(max_cpus);
   2771 	if (!set) {
   2772 		errno = ENOMEM;
   2773 		return NULL;
   2774 	}
   2775 	alloc_size = CPU_ALLOC_SIZE(max_cpus);
   2776 	CPU_ZERO_S(alloc_size, set);
   2777 
   2778 	for (curcpu = 0; curcpu < ncpu; curcpu++)
   2779 		CPU_SET_S(cpu_nums[curcpu], alloc_size, set);
   2780 
   2781 	free(cpu_nums);
   2782 
   2783 	return set;
   2784 }
   2785 
   2786 int main(int argc, char *argv[])
   2787 {
   2788 	int ret = 0;
   2789 
   2790 	setlocale(LC_NUMERIC, "en_US");
   2791 	pagesize = getpagesize();
   2792 	online_cpus = get_online_cpus();
   2793 	if (!online_cpus) {
   2794 		fprintf(stderr, "cannot get online cpus %d/%s\n",
   2795 			errno, strerror(errno));
   2796 		ret = 1;
   2797 		goto out;
   2798 	} else if (handle_args(argc, argv)) {
   2799 		ret = 1;
   2800 		goto out;
   2801 	}
   2802 
   2803 	ncpus = CPU_COUNT_S(CPU_ALLOC_SIZE(max_cpus), online_cpus);
   2804 	if (ndevs > 1 && output_name && strcmp(output_name, "-") != 0) {
   2805 		fprintf(stderr, "-o not supported with multiple devices\n");
   2806 		ret = 1;
   2807 		goto out;
   2808 	}
   2809 
   2810 	signal(SIGINT, handle_sigint);
   2811 	signal(SIGHUP, handle_sigint);
   2812 	signal(SIGTERM, handle_sigint);
   2813 	signal(SIGALRM, handle_sigint);
   2814 	signal(SIGPIPE, SIG_IGN);
   2815 
   2816 	if (kill_running_trace) {
   2817 		struct devpath *dpp;
   2818 		struct list_head *p;
   2819 
   2820 		__list_for_each(p, &devpaths) {
   2821 			dpp = list_entry(p, struct devpath, head);
   2822 			if (__stop_trace(dpp->fd)) {
   2823 				fprintf(stderr,
   2824 					"BLKTRACETEARDOWN %s failed: %d/%s\n",
   2825 					dpp->path, errno, strerror(errno));
   2826 			}
   2827 		}
   2828 	} else if (net_mode == Net_server) {
   2829 		if (output_name) {
   2830 			fprintf(stderr, "-o ignored in server mode\n");
   2831 			output_name = NULL;
   2832 		}
   2833 		ret = net_server();
   2834 	} else
   2835 		ret = run_tracers();
   2836 
   2837 out:
   2838 	if (pfp)
   2839 		fclose(pfp);
   2840 	rel_devpaths();
   2841 	return ret;
   2842 }
   2843