1 /* 2 * block queue tracing application 3 * 4 * Copyright (C) 2005 Jens Axboe <axboe (at) suse.de> 5 * Copyright (C) 2006 Jens Axboe <axboe (at) kernel.dk> 6 * 7 * Rewrite to have a single thread per CPU (managing all devices on that CPU) 8 * Alan D. Brunelle <alan.brunelle (at) hp.com> - January 2009 9 * 10 * This program is free software; you can redistribute it and/or modify 11 * it under the terms of the GNU General Public License as published by 12 * the Free Software Foundation; either version 2 of the License, or 13 * (at your option) any later version. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU General Public License for more details. 19 * 20 * You should have received a copy of the GNU General Public License 21 * along with this program; if not, write to the Free Software 22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 23 * 24 */ 25 26 #include <errno.h> 27 #include <stdarg.h> 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <string.h> 31 #include <fcntl.h> 32 #include <getopt.h> 33 #include <sched.h> 34 #include <unistd.h> 35 #include <poll.h> 36 #include <signal.h> 37 #include <pthread.h> 38 #include <locale.h> 39 #include <sys/ioctl.h> 40 #include <sys/types.h> 41 #include <sys/stat.h> 42 #include <sys/vfs.h> 43 #include <sys/mman.h> 44 #include <sys/param.h> 45 #include <sys/time.h> 46 #include <sys/resource.h> 47 #include <sys/socket.h> 48 #include <netinet/in.h> 49 #include <arpa/inet.h> 50 #include <netdb.h> 51 #include <sys/sendfile.h> 52 53 #include "btt/list.h" 54 #include "blktrace.h" 55 56 /* 57 * You may want to increase this even more, if you are logging at a high 58 * rate and see skipped/missed events 59 */ 60 #define BUF_SIZE (512 * 1024) 61 #define BUF_NR (4) 62 63 #define FILE_VBUF_SIZE (128 * 1024) 64 65 #define DEBUGFS_TYPE (0x64626720) 66 #define TRACE_NET_PORT (8462) 67 68 enum { 69 Net_none = 0, 70 Net_server, 71 Net_client, 72 }; 73 74 enum thread_status { 75 Th_running, 76 Th_leaving, 77 Th_error 78 }; 79 80 /* 81 * Generic stats collected: nevents can be _roughly_ estimated by data_read 82 * (discounting pdu...) 83 * 84 * These fields are updated w/ pdc_dr_update & pdc_nev_update below. 85 */ 86 struct pdc_stats { 87 unsigned long long data_read; 88 unsigned long long nevents; 89 }; 90 91 struct devpath { 92 struct list_head head; 93 char *path; /* path to device special file */ 94 char *buts_name; /* name returned from bt kernel code */ 95 struct pdc_stats *stats; 96 int fd, ncpus; 97 unsigned long long drops; 98 99 /* 100 * For piped output only: 101 * 102 * Each tracer will have a tracer_devpath_head that it will add new 103 * data onto. It's list is protected above (tracer_devpath_head.mutex) 104 * and it will signal the processing thread using the dp_cond, 105 * dp_mutex & dp_entries variables above. 106 */ 107 struct tracer_devpath_head *heads; 108 109 /* 110 * For network server mode only: 111 */ 112 struct cl_host *ch; 113 u32 cl_id; 114 time_t cl_connect_time; 115 struct io_info *ios; 116 }; 117 118 /* 119 * For piped output to stdout we will have each tracer thread (one per dev) 120 * tack buffers read from the relay queues on a per-device list. 121 * 122 * The main thread will then collect trace buffers from each of lists in turn. 123 * 124 * We will use a mutex to guard each of the trace_buf list. The tracers 125 * can then signal the main thread using <dp_cond,dp_mutex> and 126 * dp_entries. (When dp_entries is 0, and a tracer adds an entry it will 127 * signal. When dp_entries is 0, the main thread will wait for that condition 128 * to be signalled.) 129 * 130 * adb: It may be better just to have a large buffer per tracer per dev, 131 * and then use it as a ring-buffer. This would certainly cut down a lot 132 * of malloc/free thrashing, at the cost of more memory movements (potentially). 133 */ 134 struct trace_buf { 135 struct list_head head; 136 struct devpath *dpp; 137 void *buf; 138 int cpu, len; 139 }; 140 141 struct tracer_devpath_head { 142 pthread_mutex_t mutex; 143 struct list_head head; 144 struct trace_buf *prev; 145 }; 146 147 /* 148 * Used to handle the mmap() interfaces for output file (containing traces) 149 */ 150 struct mmap_info { 151 void *fs_buf; 152 unsigned long long fs_size, fs_max_size, fs_off, fs_buf_len; 153 unsigned long buf_size, buf_nr; 154 int pagesize; 155 }; 156 157 /* 158 * Each thread doing work on a (client) side of blktrace will have one 159 * of these. The ios array contains input/output information, pfds holds 160 * poll() data. The volatile's provide flags to/from the main executing 161 * thread. 162 */ 163 struct tracer { 164 struct list_head head; 165 struct io_info *ios; 166 struct pollfd *pfds; 167 pthread_t thread; 168 int cpu, nios; 169 volatile int status, is_done; 170 }; 171 172 /* 173 * networking stuff follows. we include a magic number so we know whether 174 * to endianness convert or not. 175 * 176 * The len field is overloaded: 177 * 0 - Indicates an "open" - allowing the server to set up for a dev/cpu 178 * 1 - Indicates a "close" - Shut down connection orderly 179 * 180 * The cpu field is overloaded on close: it will contain the number of drops. 181 */ 182 struct blktrace_net_hdr { 183 u32 magic; /* same as trace magic */ 184 char buts_name[32]; /* trace name */ 185 u32 cpu; /* for which cpu */ 186 u32 max_cpus; 187 u32 len; /* length of following trace data */ 188 u32 cl_id; /* id for set of client per-cpu connections */ 189 u32 buf_size; /* client buf_size for this trace */ 190 u32 buf_nr; /* client buf_nr for this trace */ 191 u32 page_size; /* client page_size for this trace */ 192 }; 193 194 /* 195 * Each host encountered has one of these. The head is used to link this 196 * on to the network server's ch_list. Connections associated with this 197 * host are linked on conn_list, and any devices traced on that host 198 * are connected on the devpaths list. 199 */ 200 struct cl_host { 201 struct list_head head; 202 struct list_head conn_list; 203 struct list_head devpaths; 204 struct net_server_s *ns; 205 char *hostname; 206 struct in_addr cl_in_addr; 207 int connects, ndevs, cl_opens; 208 }; 209 210 /* 211 * Each connection (client to server socket ('fd')) has one of these. A 212 * back reference to the host ('ch'), and lists headers (for the host 213 * list, and the network server conn_list) are also included. 214 */ 215 struct cl_conn { 216 struct list_head ch_head, ns_head; 217 struct cl_host *ch; 218 int fd, ncpus; 219 time_t connect_time; 220 }; 221 222 /* 223 * The network server requires some poll structures to be maintained - 224 * one per conection currently on conn_list. The nchs/ch_list values 225 * are for each host connected to this server. The addr field is used 226 * for scratch as new connections are established. 227 */ 228 struct net_server_s { 229 struct list_head conn_list; 230 struct list_head ch_list; 231 struct pollfd *pfds; 232 int listen_fd, connects, nchs; 233 struct sockaddr_in addr; 234 }; 235 236 /* 237 * This structure is (generically) used to providide information 238 * for a read-to-write set of values. 239 * 240 * ifn & ifd represent input information 241 * 242 * ofn, ofd, ofp, obuf & mmap_info are used for output file (optionally). 243 */ 244 struct io_info { 245 struct devpath *dpp; 246 FILE *ofp; 247 char *obuf; 248 struct cl_conn *nc; /* Server network connection */ 249 250 /* 251 * mmap controlled output files 252 */ 253 struct mmap_info mmap_info; 254 255 /* 256 * Client network fields 257 */ 258 unsigned int ready; 259 unsigned long long data_queued; 260 261 /* 262 * Input/output file descriptors & names 263 */ 264 int ifd, ofd; 265 char ifn[MAXPATHLEN + 64]; 266 char ofn[MAXPATHLEN + 64]; 267 }; 268 269 static char blktrace_version[] = "2.0.0"; 270 271 /* 272 * Linkage to blktrace helper routines (trace conversions) 273 */ 274 int data_is_native = -1; 275 276 static int ndevs; 277 static int max_cpus; 278 static int ncpus; 279 static cpu_set_t *online_cpus; 280 static int pagesize; 281 static int act_mask = ~0U; 282 static int kill_running_trace; 283 static int stop_watch; 284 static int piped_output; 285 286 static char *debugfs_path = "/sys/kernel/debug"; 287 static char *output_name; 288 static char *output_dir; 289 290 static unsigned long buf_size = BUF_SIZE; 291 static unsigned long buf_nr = BUF_NR; 292 293 static FILE *pfp; 294 295 static LIST_HEAD(devpaths); 296 static LIST_HEAD(tracers); 297 298 static volatile int done; 299 300 /* 301 * tracer threads add entries, the main thread takes them off and processes 302 * them. These protect the dp_entries variable. 303 */ 304 static pthread_cond_t dp_cond = PTHREAD_COND_INITIALIZER; 305 static pthread_mutex_t dp_mutex = PTHREAD_MUTEX_INITIALIZER; 306 static volatile int dp_entries; 307 308 /* 309 * These synchronize master / thread interactions. 310 */ 311 static pthread_cond_t mt_cond = PTHREAD_COND_INITIALIZER; 312 static pthread_mutex_t mt_mutex = PTHREAD_MUTEX_INITIALIZER; 313 static volatile int nthreads_running; 314 static volatile int nthreads_leaving; 315 static volatile int nthreads_error; 316 static volatile int tracers_run; 317 318 /* 319 * network cmd line params 320 */ 321 static struct sockaddr_in hostname_addr; 322 static char hostname[MAXHOSTNAMELEN]; 323 static int net_port = TRACE_NET_PORT; 324 static int net_use_sendfile = 1; 325 static int net_mode; 326 static int *cl_fds; 327 328 static int (*handle_pfds)(struct tracer *, int, int); 329 static int (*handle_list)(struct tracer_devpath_head *, struct list_head *); 330 331 #define S_OPTS "d:a:A:r:o:kw:vVb:n:D:lh:p:sI:" 332 static struct option l_opts[] = { 333 { 334 .name = "dev", 335 .has_arg = required_argument, 336 .flag = NULL, 337 .val = 'd' 338 }, 339 { 340 .name = "input-devs", 341 .has_arg = required_argument, 342 .flag = NULL, 343 .val = 'I' 344 }, 345 { 346 .name = "act-mask", 347 .has_arg = required_argument, 348 .flag = NULL, 349 .val = 'a' 350 }, 351 { 352 .name = "set-mask", 353 .has_arg = required_argument, 354 .flag = NULL, 355 .val = 'A' 356 }, 357 { 358 .name = "relay", 359 .has_arg = required_argument, 360 .flag = NULL, 361 .val = 'r' 362 }, 363 { 364 .name = "output", 365 .has_arg = required_argument, 366 .flag = NULL, 367 .val = 'o' 368 }, 369 { 370 .name = "kill", 371 .has_arg = no_argument, 372 .flag = NULL, 373 .val = 'k' 374 }, 375 { 376 .name = "stopwatch", 377 .has_arg = required_argument, 378 .flag = NULL, 379 .val = 'w' 380 }, 381 { 382 .name = "version", 383 .has_arg = no_argument, 384 .flag = NULL, 385 .val = 'v' 386 }, 387 { 388 .name = "version", 389 .has_arg = no_argument, 390 .flag = NULL, 391 .val = 'V' 392 }, 393 { 394 .name = "buffer-size", 395 .has_arg = required_argument, 396 .flag = NULL, 397 .val = 'b' 398 }, 399 { 400 .name = "num-sub-buffers", 401 .has_arg = required_argument, 402 .flag = NULL, 403 .val = 'n' 404 }, 405 { 406 .name = "output-dir", 407 .has_arg = required_argument, 408 .flag = NULL, 409 .val = 'D' 410 }, 411 { 412 .name = "listen", 413 .has_arg = no_argument, 414 .flag = NULL, 415 .val = 'l' 416 }, 417 { 418 .name = "host", 419 .has_arg = required_argument, 420 .flag = NULL, 421 .val = 'h' 422 }, 423 { 424 .name = "port", 425 .has_arg = required_argument, 426 .flag = NULL, 427 .val = 'p' 428 }, 429 { 430 .name = "no-sendfile", 431 .has_arg = no_argument, 432 .flag = NULL, 433 .val = 's' 434 }, 435 { 436 .name = NULL, 437 } 438 }; 439 440 static char usage_str[] = "\n\n" \ 441 "-d <dev> | --dev=<dev>\n" \ 442 "[ -r <debugfs path> | --relay=<debugfs path> ]\n" \ 443 "[ -o <file> | --output=<file>]\n" \ 444 "[ -D <dir> | --output-dir=<dir>\n" \ 445 "[ -w <time> | --stopwatch=<time>]\n" \ 446 "[ -a <action field> | --act-mask=<action field>]\n" \ 447 "[ -A <action mask> | --set-mask=<action mask>]\n" \ 448 "[ -b <size> | --buffer-size]\n" \ 449 "[ -n <number> | --num-sub-buffers=<number>]\n" \ 450 "[ -l | --listen]\n" \ 451 "[ -h <hostname> | --host=<hostname>]\n" \ 452 "[ -p <port number> | --port=<port number>]\n" \ 453 "[ -s | --no-sendfile]\n" \ 454 "[ -I <devs file> | --input-devs=<devs file>]\n" \ 455 "[ -v <version> | --version]\n" \ 456 "[ -V <version> | --version]\n" \ 457 458 "\t-d Use specified device. May also be given last after options\n" \ 459 "\t-r Path to mounted debugfs, defaults to /sys/kernel/debug\n" \ 460 "\t-o File(s) to send output to\n" \ 461 "\t-D Directory to prepend to output file names\n" \ 462 "\t-w Stop after defined time, in seconds\n" \ 463 "\t-a Only trace specified actions. See documentation\n" \ 464 "\t-A Give trace mask as a single value. See documentation\n" \ 465 "\t-b Sub buffer size in KiB (default 512)\n" \ 466 "\t-n Number of sub buffers (default 4)\n" \ 467 "\t-l Run in network listen mode (blktrace server)\n" \ 468 "\t-h Run in network client mode, connecting to the given host\n" \ 469 "\t-p Network port to use (default 8462)\n" \ 470 "\t-s Make the network client NOT use sendfile() to transfer data\n" \ 471 "\t-I Add devices found in <devs file>\n" \ 472 "\t-v Print program version info\n" \ 473 "\t-V Print program version info\n\n"; 474 475 static void clear_events(struct pollfd *pfd) 476 { 477 pfd->events = 0; 478 pfd->revents = 0; 479 } 480 481 static inline int net_client_use_sendfile(void) 482 { 483 return net_mode == Net_client && net_use_sendfile; 484 } 485 486 static inline int net_client_use_send(void) 487 { 488 return net_mode == Net_client && !net_use_sendfile; 489 } 490 491 static inline int use_tracer_devpaths(void) 492 { 493 return piped_output || net_client_use_send(); 494 } 495 496 static inline int in_addr_eq(struct in_addr a, struct in_addr b) 497 { 498 return a.s_addr == b.s_addr; 499 } 500 501 static inline void pdc_dr_update(struct devpath *dpp, int cpu, int data_read) 502 { 503 dpp->stats[cpu].data_read += data_read; 504 } 505 506 static inline void pdc_nev_update(struct devpath *dpp, int cpu, int nevents) 507 { 508 dpp->stats[cpu].nevents += nevents; 509 } 510 511 static void show_usage(char *prog) 512 { 513 fprintf(stderr, "Usage: %s %s", prog, usage_str); 514 } 515 516 /* 517 * Create a timespec 'msec' milliseconds into the future 518 */ 519 static inline void make_timespec(struct timespec *tsp, long delta_msec) 520 { 521 struct timeval now; 522 523 gettimeofday(&now, NULL); 524 tsp->tv_sec = now.tv_sec; 525 tsp->tv_nsec = 1000L * now.tv_usec; 526 527 tsp->tv_nsec += (delta_msec * 1000000L); 528 if (tsp->tv_nsec > 1000000000L) { 529 long secs = tsp->tv_nsec / 1000000000L; 530 531 tsp->tv_sec += secs; 532 tsp->tv_nsec -= (secs * 1000000000L); 533 } 534 } 535 536 /* 537 * Add a timer to ensure wait ends 538 */ 539 static void t_pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) 540 { 541 struct timespec ts; 542 543 make_timespec(&ts, 50); 544 pthread_cond_timedwait(cond, mutex, &ts); 545 } 546 547 static void unblock_tracers(void) 548 { 549 pthread_mutex_lock(&mt_mutex); 550 tracers_run = 1; 551 pthread_cond_broadcast(&mt_cond); 552 pthread_mutex_unlock(&mt_mutex); 553 } 554 555 static void tracer_wait_unblock(struct tracer *tp) 556 { 557 pthread_mutex_lock(&mt_mutex); 558 while (!tp->is_done && !tracers_run) 559 pthread_cond_wait(&mt_cond, &mt_mutex); 560 pthread_mutex_unlock(&mt_mutex); 561 } 562 563 static void tracer_signal_ready(struct tracer *tp, 564 enum thread_status th_status, 565 int status) 566 { 567 pthread_mutex_lock(&mt_mutex); 568 tp->status = status; 569 570 if (th_status == Th_running) 571 nthreads_running++; 572 else if (th_status == Th_error) 573 nthreads_error++; 574 else 575 nthreads_leaving++; 576 577 pthread_cond_signal(&mt_cond); 578 pthread_mutex_unlock(&mt_mutex); 579 } 580 581 static void wait_tracers_ready(int ncpus_started) 582 { 583 pthread_mutex_lock(&mt_mutex); 584 while ((nthreads_running + nthreads_error) < ncpus_started) 585 t_pthread_cond_wait(&mt_cond, &mt_mutex); 586 pthread_mutex_unlock(&mt_mutex); 587 } 588 589 static void wait_tracers_leaving(void) 590 { 591 pthread_mutex_lock(&mt_mutex); 592 while (nthreads_leaving < nthreads_running) 593 t_pthread_cond_wait(&mt_cond, &mt_mutex); 594 pthread_mutex_unlock(&mt_mutex); 595 } 596 597 static void init_mmap_info(struct mmap_info *mip) 598 { 599 mip->buf_size = buf_size; 600 mip->buf_nr = buf_nr; 601 mip->pagesize = pagesize; 602 } 603 604 static void net_close_connection(int *fd) 605 { 606 shutdown(*fd, SHUT_RDWR); 607 close(*fd); 608 *fd = -1; 609 } 610 611 static void dpp_free(struct devpath *dpp) 612 { 613 if (dpp->stats) 614 free(dpp->stats); 615 if (dpp->ios) 616 free(dpp->ios); 617 if (dpp->path) 618 free(dpp->path); 619 if (dpp->buts_name) 620 free(dpp->buts_name); 621 free(dpp); 622 } 623 624 static int lock_on_cpu(int cpu) 625 { 626 cpu_set_t * cpu_mask; 627 size_t size; 628 629 cpu_mask = CPU_ALLOC(max_cpus); 630 size = CPU_ALLOC_SIZE(max_cpus); 631 632 CPU_ZERO_S(size, cpu_mask); 633 CPU_SET_S(cpu, size, cpu_mask); 634 if (sched_setaffinity(0, size, cpu_mask) < 0) { 635 CPU_FREE(cpu_mask); 636 return errno; 637 } 638 639 CPU_FREE(cpu_mask); 640 return 0; 641 } 642 643 static int increase_limit(int resource, rlim_t increase) 644 { 645 struct rlimit rlim; 646 int save_errno = errno; 647 648 if (!getrlimit(resource, &rlim)) { 649 rlim.rlim_cur += increase; 650 if (rlim.rlim_cur >= rlim.rlim_max) 651 rlim.rlim_max = rlim.rlim_cur + increase; 652 653 if (!setrlimit(resource, &rlim)) 654 return 1; 655 } 656 657 errno = save_errno; 658 return 0; 659 } 660 661 static int handle_open_failure(void) 662 { 663 if (errno == ENFILE || errno == EMFILE) 664 return increase_limit(RLIMIT_NOFILE, 16); 665 return 0; 666 } 667 668 static int handle_mem_failure(size_t length) 669 { 670 if (errno == ENFILE) 671 return handle_open_failure(); 672 else if (errno == ENOMEM) 673 return increase_limit(RLIMIT_MEMLOCK, 2 * length); 674 return 0; 675 } 676 677 static FILE *my_fopen(const char *path, const char *mode) 678 { 679 FILE *fp; 680 681 do { 682 fp = fopen(path, mode); 683 } while (fp == NULL && handle_open_failure()); 684 685 return fp; 686 } 687 688 static int my_open(const char *path, int flags) 689 { 690 int fd; 691 692 do { 693 fd = open(path, flags); 694 } while (fd < 0 && handle_open_failure()); 695 696 return fd; 697 } 698 699 static int my_socket(int domain, int type, int protocol) 700 { 701 int fd; 702 703 do { 704 fd = socket(domain, type, protocol); 705 } while (fd < 0 && handle_open_failure()); 706 707 return fd; 708 } 709 710 static int my_accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen) 711 { 712 int fd; 713 714 do { 715 fd = accept(sockfd, addr, addrlen); 716 } while (fd < 0 && handle_open_failure()); 717 718 return fd; 719 } 720 721 static void *my_mmap(void *addr, size_t length, int prot, int flags, int fd, 722 off_t offset) 723 { 724 void *new; 725 726 do { 727 new = mmap(addr, length, prot, flags, fd, offset); 728 } while (new == MAP_FAILED && handle_mem_failure(length)); 729 730 return new; 731 } 732 733 static int my_mlock(struct tracer *tp, 734 const void *addr, size_t len) 735 { 736 int ret, retry = 0; 737 738 do { 739 ret = mlock(addr, len); 740 if ((retry >= 10) && tp && tp->is_done) 741 break; 742 retry++; 743 } while (ret < 0 && handle_mem_failure(len)); 744 745 return ret; 746 } 747 748 static int setup_mmap(int fd, unsigned int maxlen, 749 struct mmap_info *mip, 750 struct tracer *tp) 751 { 752 if (mip->fs_off + maxlen > mip->fs_buf_len) { 753 unsigned long nr = max(16, mip->buf_nr); 754 755 if (mip->fs_buf) { 756 munlock(mip->fs_buf, mip->fs_buf_len); 757 munmap(mip->fs_buf, mip->fs_buf_len); 758 mip->fs_buf = NULL; 759 } 760 761 mip->fs_off = mip->fs_size & (mip->pagesize - 1); 762 mip->fs_buf_len = (nr * mip->buf_size) - mip->fs_off; 763 mip->fs_max_size += mip->fs_buf_len; 764 765 if (ftruncate(fd, mip->fs_max_size) < 0) { 766 perror("setup_mmap: ftruncate"); 767 return 1; 768 } 769 770 mip->fs_buf = my_mmap(NULL, mip->fs_buf_len, PROT_WRITE, 771 MAP_SHARED, fd, 772 mip->fs_size - mip->fs_off); 773 if (mip->fs_buf == MAP_FAILED) { 774 perror("setup_mmap: mmap"); 775 return 1; 776 } 777 if (my_mlock(tp, mip->fs_buf, mip->fs_buf_len) < 0) { 778 perror("setup_mlock: mlock"); 779 return 1; 780 } 781 } 782 783 return 0; 784 } 785 786 static int __stop_trace(int fd) 787 { 788 /* 789 * Should be stopped, don't complain if it isn't 790 */ 791 ioctl(fd, BLKTRACESTOP); 792 return ioctl(fd, BLKTRACETEARDOWN); 793 } 794 795 static int write_data(char *buf, int len) 796 { 797 int ret; 798 799 rewrite: 800 ret = fwrite(buf, len, 1, pfp); 801 if (ferror(pfp) || ret != 1) { 802 if (errno == EINTR) { 803 clearerr(pfp); 804 goto rewrite; 805 } 806 807 if (!piped_output || (errno != EPIPE && errno != EBADF)) { 808 fprintf(stderr, "write(%d) failed: %d/%s\n", 809 len, errno, strerror(errno)); 810 } 811 goto err; 812 } 813 814 fflush(pfp); 815 return 0; 816 817 err: 818 clearerr(pfp); 819 return 1; 820 } 821 822 /* 823 * Returns the number of bytes read (successfully) 824 */ 825 static int __net_recv_data(int fd, void *buf, unsigned int len) 826 { 827 unsigned int bytes_left = len; 828 829 while (bytes_left && !done) { 830 int ret = recv(fd, buf, bytes_left, MSG_WAITALL); 831 832 if (ret == 0) 833 break; 834 else if (ret < 0) { 835 if (errno == EAGAIN) { 836 usleep(50); 837 continue; 838 } 839 perror("server: net_recv_data: recv failed"); 840 break; 841 } else { 842 buf += ret; 843 bytes_left -= ret; 844 } 845 } 846 847 return len - bytes_left; 848 } 849 850 static int net_recv_data(int fd, void *buf, unsigned int len) 851 { 852 return __net_recv_data(fd, buf, len); 853 } 854 855 /* 856 * Returns number of bytes written 857 */ 858 static int net_send_data(int fd, void *buf, unsigned int buf_len) 859 { 860 int ret; 861 unsigned int bytes_left = buf_len; 862 863 while (bytes_left) { 864 ret = send(fd, buf, bytes_left, 0); 865 if (ret < 0) { 866 perror("send"); 867 break; 868 } 869 870 buf += ret; 871 bytes_left -= ret; 872 } 873 874 return buf_len - bytes_left; 875 } 876 877 static int net_send_header(int fd, int cpu, char *buts_name, int len) 878 { 879 struct blktrace_net_hdr hdr; 880 881 memset(&hdr, 0, sizeof(hdr)); 882 883 hdr.magic = BLK_IO_TRACE_MAGIC; 884 memset(hdr.buts_name, 0, sizeof(hdr.buts_name)); 885 strncpy(hdr.buts_name, buts_name, sizeof(hdr.buts_name)); 886 hdr.buts_name[sizeof(hdr.buts_name) - 1] = '\0'; 887 hdr.cpu = cpu; 888 hdr.max_cpus = max_cpus; 889 hdr.len = len; 890 hdr.cl_id = getpid(); 891 hdr.buf_size = buf_size; 892 hdr.buf_nr = buf_nr; 893 hdr.page_size = pagesize; 894 895 return net_send_data(fd, &hdr, sizeof(hdr)) != sizeof(hdr); 896 } 897 898 static void net_send_open_close(int fd, int cpu, char *buts_name, int len) 899 { 900 struct blktrace_net_hdr ret_hdr; 901 902 net_send_header(fd, cpu, buts_name, len); 903 net_recv_data(fd, &ret_hdr, sizeof(ret_hdr)); 904 } 905 906 static void net_send_open(int fd, int cpu, char *buts_name) 907 { 908 net_send_open_close(fd, cpu, buts_name, 0); 909 } 910 911 static void net_send_close(int fd, char *buts_name, int drops) 912 { 913 /* 914 * Overload CPU w/ number of drops 915 * 916 * XXX: Need to clear/set done around call - done=1 (which 917 * is true here) stops reads from happening... :-( 918 */ 919 done = 0; 920 net_send_open_close(fd, drops, buts_name, 1); 921 done = 1; 922 } 923 924 static void ack_open_close(int fd, char *buts_name) 925 { 926 net_send_header(fd, 0, buts_name, 2); 927 } 928 929 static void net_send_drops(int fd) 930 { 931 struct list_head *p; 932 933 __list_for_each(p, &devpaths) { 934 struct devpath *dpp = list_entry(p, struct devpath, head); 935 936 net_send_close(fd, dpp->buts_name, dpp->drops); 937 } 938 } 939 940 /* 941 * Returns: 942 * 0: "EOF" 943 * 1: OK 944 * -1: Error 945 */ 946 static int net_get_header(struct cl_conn *nc, struct blktrace_net_hdr *bnh) 947 { 948 int bytes_read; 949 int fl = fcntl(nc->fd, F_GETFL); 950 951 fcntl(nc->fd, F_SETFL, fl | O_NONBLOCK); 952 bytes_read = __net_recv_data(nc->fd, bnh, sizeof(*bnh)); 953 fcntl(nc->fd, F_SETFL, fl & ~O_NONBLOCK); 954 955 if (bytes_read == sizeof(*bnh)) 956 return 1; 957 else if (bytes_read == 0) 958 return 0; 959 else 960 return -1; 961 } 962 963 static int net_setup_addr(void) 964 { 965 struct sockaddr_in *addr = &hostname_addr; 966 967 memset(addr, 0, sizeof(*addr)); 968 addr->sin_family = AF_INET; 969 addr->sin_port = htons(net_port); 970 971 if (inet_aton(hostname, &addr->sin_addr) != 1) { 972 struct hostent *hent; 973 retry: 974 hent = gethostbyname(hostname); 975 if (!hent) { 976 if (h_errno == TRY_AGAIN) { 977 usleep(100); 978 goto retry; 979 } else if (h_errno == NO_RECOVERY) { 980 fprintf(stderr, "gethostbyname(%s)" 981 "non-recoverable error encountered\n", 982 hostname); 983 } else { 984 /* 985 * HOST_NOT_FOUND, NO_ADDRESS or NO_DATA 986 */ 987 fprintf(stderr, "Host %s not found\n", 988 hostname); 989 } 990 return 1; 991 } 992 993 memcpy(&addr->sin_addr, hent->h_addr, 4); 994 memset(hostname, 0, sizeof(hostname)); 995 strncpy(hostname, hent->h_name, sizeof(hostname)); 996 hostname[sizeof(hostname) - 1] = '\0'; 997 } 998 999 return 0; 1000 } 1001 1002 static int net_setup_client(void) 1003 { 1004 int fd; 1005 struct sockaddr_in *addr = &hostname_addr; 1006 1007 fd = my_socket(AF_INET, SOCK_STREAM, 0); 1008 if (fd < 0) { 1009 perror("client: socket"); 1010 return -1; 1011 } 1012 1013 if (connect(fd, (struct sockaddr *)addr, sizeof(*addr)) < 0) { 1014 if (errno == ECONNREFUSED) 1015 fprintf(stderr, 1016 "\nclient: Connection to %s refused, " 1017 "perhaps the server is not started?\n\n", 1018 hostname); 1019 else 1020 perror("client: connect"); 1021 1022 close(fd); 1023 return -1; 1024 } 1025 1026 return fd; 1027 } 1028 1029 static int open_client_connections(void) 1030 { 1031 int cpu; 1032 size_t alloc_size = CPU_ALLOC_SIZE(max_cpus); 1033 1034 cl_fds = calloc(ncpus, sizeof(*cl_fds)); 1035 for (cpu = 0; cpu < max_cpus; cpu++) { 1036 if (!CPU_ISSET_S(cpu, alloc_size, online_cpus)) 1037 continue; 1038 cl_fds[cpu] = net_setup_client(); 1039 if (cl_fds[cpu] < 0) 1040 goto err; 1041 } 1042 return 0; 1043 1044 err: 1045 while (cpu > 0) 1046 close(cl_fds[cpu--]); 1047 free(cl_fds); 1048 return 1; 1049 } 1050 1051 static void close_client_connections(void) 1052 { 1053 if (cl_fds) { 1054 int cpu, *fdp; 1055 size_t alloc_size = CPU_ALLOC_SIZE(max_cpus); 1056 1057 for (cpu = 0, fdp = cl_fds; cpu < max_cpus; cpu++, fdp++) { 1058 if (!CPU_ISSET_S(cpu, alloc_size, online_cpus)) 1059 continue; 1060 if (*fdp >= 0) { 1061 net_send_drops(*fdp); 1062 net_close_connection(fdp); 1063 } 1064 } 1065 free(cl_fds); 1066 } 1067 } 1068 1069 static int setup_buts(void) 1070 { 1071 struct list_head *p; 1072 int ret = 0; 1073 1074 __list_for_each(p, &devpaths) { 1075 struct blk_user_trace_setup buts; 1076 struct devpath *dpp = list_entry(p, struct devpath, head); 1077 1078 memset(&buts, 0, sizeof(buts)); 1079 buts.buf_size = buf_size; 1080 buts.buf_nr = buf_nr; 1081 buts.act_mask = act_mask; 1082 1083 if (ioctl(dpp->fd, BLKTRACESETUP, &buts) >= 0) { 1084 dpp->ncpus = max_cpus; 1085 dpp->buts_name = strdup(buts.name); 1086 if (dpp->stats) 1087 free(dpp->stats); 1088 dpp->stats = calloc(dpp->ncpus, sizeof(*dpp->stats)); 1089 memset(dpp->stats, 0, dpp->ncpus * sizeof(*dpp->stats)); 1090 } else { 1091 fprintf(stderr, "BLKTRACESETUP(2) %s failed: %d/%s\n", 1092 dpp->path, errno, strerror(errno)); 1093 ret++; 1094 } 1095 } 1096 1097 return ret; 1098 } 1099 1100 static void start_buts(void) 1101 { 1102 struct list_head *p; 1103 1104 __list_for_each(p, &devpaths) { 1105 struct devpath *dpp = list_entry(p, struct devpath, head); 1106 1107 if (ioctl(dpp->fd, BLKTRACESTART) < 0) { 1108 fprintf(stderr, "BLKTRACESTART %s failed: %d/%s\n", 1109 dpp->path, errno, strerror(errno)); 1110 } 1111 } 1112 } 1113 1114 static int get_drops(struct devpath *dpp) 1115 { 1116 int fd, drops = 0; 1117 char fn[MAXPATHLEN + 64], tmp[256]; 1118 1119 snprintf(fn, sizeof(fn), "%s/block/%s/dropped", debugfs_path, 1120 dpp->buts_name); 1121 1122 fd = my_open(fn, O_RDONLY); 1123 if (fd < 0) { 1124 /* 1125 * This may be ok: the kernel may not support 1126 * dropped counts. 1127 */ 1128 if (errno != ENOENT) 1129 fprintf(stderr, "Could not open %s: %d/%s\n", 1130 fn, errno, strerror(errno)); 1131 return 0; 1132 } else if (read(fd, tmp, sizeof(tmp)) < 0) { 1133 fprintf(stderr, "Could not read %s: %d/%s\n", 1134 fn, errno, strerror(errno)); 1135 } else 1136 drops = atoi(tmp); 1137 close(fd); 1138 1139 return drops; 1140 } 1141 1142 static void get_all_drops(void) 1143 { 1144 struct list_head *p; 1145 1146 __list_for_each(p, &devpaths) { 1147 struct devpath *dpp = list_entry(p, struct devpath, head); 1148 1149 dpp->drops = get_drops(dpp); 1150 } 1151 } 1152 1153 static inline struct trace_buf *alloc_trace_buf(int cpu, int bufsize) 1154 { 1155 struct trace_buf *tbp; 1156 1157 tbp = malloc(sizeof(*tbp) + bufsize); 1158 INIT_LIST_HEAD(&tbp->head); 1159 tbp->len = 0; 1160 tbp->buf = (void *)(tbp + 1); 1161 tbp->cpu = cpu; 1162 tbp->dpp = NULL; /* Will be set when tbp is added */ 1163 1164 return tbp; 1165 } 1166 1167 static void free_tracer_heads(struct devpath *dpp) 1168 { 1169 int cpu; 1170 struct tracer_devpath_head *hd; 1171 1172 for (cpu = 0, hd = dpp->heads; cpu < max_cpus; cpu++, hd++) { 1173 if (hd->prev) 1174 free(hd->prev); 1175 1176 pthread_mutex_destroy(&hd->mutex); 1177 } 1178 free(dpp->heads); 1179 } 1180 1181 static int setup_tracer_devpaths(void) 1182 { 1183 struct list_head *p; 1184 1185 if (net_client_use_send()) 1186 if (open_client_connections()) 1187 return 1; 1188 1189 __list_for_each(p, &devpaths) { 1190 int cpu; 1191 struct tracer_devpath_head *hd; 1192 struct devpath *dpp = list_entry(p, struct devpath, head); 1193 1194 dpp->heads = calloc(max_cpus, sizeof(struct tracer_devpath_head)); 1195 for (cpu = 0, hd = dpp->heads; cpu < max_cpus; cpu++, hd++) { 1196 INIT_LIST_HEAD(&hd->head); 1197 pthread_mutex_init(&hd->mutex, NULL); 1198 hd->prev = NULL; 1199 } 1200 } 1201 1202 return 0; 1203 } 1204 1205 static inline void add_trace_buf(struct devpath *dpp, int cpu, 1206 struct trace_buf **tbpp) 1207 { 1208 struct trace_buf *tbp = *tbpp; 1209 struct tracer_devpath_head *hd = &dpp->heads[cpu]; 1210 1211 tbp->dpp = dpp; 1212 1213 pthread_mutex_lock(&hd->mutex); 1214 list_add_tail(&tbp->head, &hd->head); 1215 pthread_mutex_unlock(&hd->mutex); 1216 1217 *tbpp = alloc_trace_buf(cpu, buf_size); 1218 } 1219 1220 static inline void incr_entries(int entries_handled) 1221 { 1222 pthread_mutex_lock(&dp_mutex); 1223 if (dp_entries == 0) 1224 pthread_cond_signal(&dp_cond); 1225 dp_entries += entries_handled; 1226 pthread_mutex_unlock(&dp_mutex); 1227 } 1228 1229 static void decr_entries(int handled) 1230 { 1231 pthread_mutex_lock(&dp_mutex); 1232 dp_entries -= handled; 1233 pthread_mutex_unlock(&dp_mutex); 1234 } 1235 1236 static int wait_empty_entries(void) 1237 { 1238 pthread_mutex_lock(&dp_mutex); 1239 while (!done && dp_entries == 0) 1240 t_pthread_cond_wait(&dp_cond, &dp_mutex); 1241 pthread_mutex_unlock(&dp_mutex); 1242 1243 return !done; 1244 } 1245 1246 static int add_devpath(char *path) 1247 { 1248 int fd; 1249 struct devpath *dpp; 1250 struct list_head *p; 1251 1252 /* 1253 * Verify device is not duplicated 1254 */ 1255 __list_for_each(p, &devpaths) { 1256 struct devpath *tmp = list_entry(p, struct devpath, head); 1257 if (!strcmp(tmp->path, path)) 1258 return 0; 1259 } 1260 /* 1261 * Verify device is valid before going too far 1262 */ 1263 fd = my_open(path, O_RDONLY | O_NONBLOCK); 1264 if (fd < 0) { 1265 fprintf(stderr, "Invalid path %s specified: %d/%s\n", 1266 path, errno, strerror(errno)); 1267 return 1; 1268 } 1269 1270 dpp = malloc(sizeof(*dpp)); 1271 memset(dpp, 0, sizeof(*dpp)); 1272 dpp->path = strdup(path); 1273 dpp->fd = fd; 1274 ndevs++; 1275 list_add_tail(&dpp->head, &devpaths); 1276 1277 return 0; 1278 } 1279 1280 static void rel_devpaths(void) 1281 { 1282 struct list_head *p, *q; 1283 1284 list_for_each_safe(p, q, &devpaths) { 1285 struct devpath *dpp = list_entry(p, struct devpath, head); 1286 1287 list_del(&dpp->head); 1288 __stop_trace(dpp->fd); 1289 close(dpp->fd); 1290 1291 if (dpp->heads) 1292 free_tracer_heads(dpp); 1293 1294 dpp_free(dpp); 1295 ndevs--; 1296 } 1297 } 1298 1299 static int flush_subbuf_net(struct trace_buf *tbp) 1300 { 1301 int fd = cl_fds[tbp->cpu]; 1302 struct devpath *dpp = tbp->dpp; 1303 1304 if (net_send_header(fd, tbp->cpu, dpp->buts_name, tbp->len)) 1305 return 1; 1306 else if (net_send_data(fd, tbp->buf, tbp->len) != tbp->len) 1307 return 1; 1308 1309 return 0; 1310 } 1311 1312 static int 1313 handle_list_net(__attribute__((__unused__))struct tracer_devpath_head *hd, 1314 struct list_head *list) 1315 { 1316 struct trace_buf *tbp; 1317 struct list_head *p, *q; 1318 int entries_handled = 0; 1319 1320 list_for_each_safe(p, q, list) { 1321 tbp = list_entry(p, struct trace_buf, head); 1322 1323 list_del(&tbp->head); 1324 entries_handled++; 1325 1326 if (cl_fds[tbp->cpu] >= 0) { 1327 if (flush_subbuf_net(tbp)) { 1328 close(cl_fds[tbp->cpu]); 1329 cl_fds[tbp->cpu] = -1; 1330 } 1331 } 1332 1333 free(tbp); 1334 } 1335 1336 return entries_handled; 1337 } 1338 1339 /* 1340 * Tack 'tbp's buf onto the tail of 'prev's buf 1341 */ 1342 static struct trace_buf *tb_combine(struct trace_buf *prev, 1343 struct trace_buf *tbp) 1344 { 1345 unsigned long tot_len; 1346 1347 tot_len = prev->len + tbp->len; 1348 if (tot_len > buf_size) { 1349 /* 1350 * tbp->head isn't connected (it was 'prev' 1351 * so it had been taken off of the list 1352 * before). Therefore, we can realloc 1353 * the whole structures, as the other fields 1354 * are "static". 1355 */ 1356 prev = realloc(prev, sizeof(*prev) + tot_len); 1357 prev->buf = (void *)(prev + 1); 1358 } 1359 1360 memcpy(prev->buf + prev->len, tbp->buf, tbp->len); 1361 prev->len = tot_len; 1362 1363 free(tbp); 1364 return prev; 1365 } 1366 1367 static int handle_list_file(struct tracer_devpath_head *hd, 1368 struct list_head *list) 1369 { 1370 int off, t_len, nevents; 1371 struct blk_io_trace *t; 1372 struct list_head *p, *q; 1373 int entries_handled = 0; 1374 struct trace_buf *tbp, *prev; 1375 1376 prev = hd->prev; 1377 list_for_each_safe(p, q, list) { 1378 tbp = list_entry(p, struct trace_buf, head); 1379 list_del(&tbp->head); 1380 entries_handled++; 1381 1382 /* 1383 * If there was some leftover before, tack this new 1384 * entry onto the tail of the previous one. 1385 */ 1386 if (prev) 1387 tbp = tb_combine(prev, tbp); 1388 1389 /* 1390 * See how many whole traces there are - send them 1391 * all out in one go. 1392 */ 1393 off = 0; 1394 nevents = 0; 1395 while (off + (int)sizeof(*t) <= tbp->len) { 1396 t = (struct blk_io_trace *)(tbp->buf + off); 1397 t_len = sizeof(*t) + t->pdu_len; 1398 if (off + t_len > tbp->len) 1399 break; 1400 1401 off += t_len; 1402 nevents++; 1403 } 1404 if (nevents) 1405 pdc_nev_update(tbp->dpp, tbp->cpu, nevents); 1406 1407 /* 1408 * Write any full set of traces, any remaining data is kept 1409 * for the next pass. 1410 */ 1411 if (off) { 1412 if (write_data(tbp->buf, off) || off == tbp->len) { 1413 free(tbp); 1414 prev = NULL; 1415 } 1416 else { 1417 /* 1418 * Move valid data to beginning of buffer 1419 */ 1420 tbp->len -= off; 1421 memmove(tbp->buf, tbp->buf + off, tbp->len); 1422 prev = tbp; 1423 } 1424 } else 1425 prev = tbp; 1426 } 1427 hd->prev = prev; 1428 1429 return entries_handled; 1430 } 1431 1432 static void __process_trace_bufs(void) 1433 { 1434 int cpu; 1435 struct list_head *p; 1436 struct list_head list; 1437 int handled = 0; 1438 1439 __list_for_each(p, &devpaths) { 1440 struct devpath *dpp = list_entry(p, struct devpath, head); 1441 struct tracer_devpath_head *hd = dpp->heads; 1442 1443 for (cpu = 0; cpu < max_cpus; cpu++, hd++) { 1444 pthread_mutex_lock(&hd->mutex); 1445 if (list_empty(&hd->head)) { 1446 pthread_mutex_unlock(&hd->mutex); 1447 continue; 1448 } 1449 1450 list_replace_init(&hd->head, &list); 1451 pthread_mutex_unlock(&hd->mutex); 1452 1453 handled += handle_list(hd, &list); 1454 } 1455 } 1456 1457 if (handled) 1458 decr_entries(handled); 1459 } 1460 1461 static void process_trace_bufs(void) 1462 { 1463 while (wait_empty_entries()) 1464 __process_trace_bufs(); 1465 } 1466 1467 static void clean_trace_bufs(void) 1468 { 1469 /* 1470 * No mutex needed here: we're only reading from the lists, 1471 * tracers are done 1472 */ 1473 while (dp_entries) 1474 __process_trace_bufs(); 1475 } 1476 1477 static inline void read_err(int cpu, char *ifn) 1478 { 1479 if (errno != EAGAIN) 1480 fprintf(stderr, "Thread %d failed read of %s: %d/%s\n", 1481 cpu, ifn, errno, strerror(errno)); 1482 } 1483 1484 static int net_sendfile(struct io_info *iop) 1485 { 1486 int ret; 1487 1488 ret = sendfile(iop->ofd, iop->ifd, NULL, iop->ready); 1489 if (ret < 0) { 1490 perror("sendfile"); 1491 return 1; 1492 } else if (ret < (int)iop->ready) { 1493 fprintf(stderr, "short sendfile send (%d of %d)\n", 1494 ret, iop->ready); 1495 return 1; 1496 } 1497 1498 return 0; 1499 } 1500 1501 static inline int net_sendfile_data(struct tracer *tp, struct io_info *iop) 1502 { 1503 struct devpath *dpp = iop->dpp; 1504 1505 if (net_send_header(iop->ofd, tp->cpu, dpp->buts_name, iop->ready)) 1506 return 1; 1507 return net_sendfile(iop); 1508 } 1509 1510 static int fill_ofname(char *dst, int dstlen, char *subdir, char *buts_name, 1511 int cpu) 1512 { 1513 int len; 1514 struct stat sb; 1515 1516 if (output_dir) 1517 len = snprintf(dst, dstlen, "%s/", output_dir); 1518 else 1519 len = snprintf(dst, dstlen, "./"); 1520 1521 if (subdir) 1522 len += snprintf(dst + len, dstlen - len, "%s", subdir); 1523 1524 if (stat(dst, &sb) < 0) { 1525 if (errno != ENOENT) { 1526 fprintf(stderr, 1527 "Destination dir %s stat failed: %d/%s\n", 1528 dst, errno, strerror(errno)); 1529 return 1; 1530 } 1531 /* 1532 * There is no synchronization between multiple threads 1533 * trying to create the directory at once. It's harmless 1534 * to let them try, so just detect the problem and move on. 1535 */ 1536 if (mkdir(dst, 0755) < 0 && errno != EEXIST) { 1537 fprintf(stderr, 1538 "Destination dir %s can't be made: %d/%s\n", 1539 dst, errno, strerror(errno)); 1540 return 1; 1541 } 1542 } 1543 1544 if (output_name) 1545 snprintf(dst + len, dstlen - len, "%s.blktrace.%d", 1546 output_name, cpu); 1547 else 1548 snprintf(dst + len, dstlen - len, "%s.blktrace.%d", 1549 buts_name, cpu); 1550 1551 return 0; 1552 } 1553 1554 static int set_vbuf(struct io_info *iop, int mode, size_t size) 1555 { 1556 iop->obuf = malloc(size); 1557 if (setvbuf(iop->ofp, iop->obuf, mode, size) < 0) { 1558 fprintf(stderr, "setvbuf(%s, %d) failed: %d/%s\n", 1559 iop->dpp->path, (int)size, errno, 1560 strerror(errno)); 1561 free(iop->obuf); 1562 return 1; 1563 } 1564 1565 return 0; 1566 } 1567 1568 static int iop_open(struct io_info *iop, int cpu) 1569 { 1570 char hostdir[MAXPATHLEN + 64]; 1571 1572 iop->ofd = -1; 1573 if (net_mode == Net_server) { 1574 struct cl_conn *nc = iop->nc; 1575 int len; 1576 1577 len = snprintf(hostdir, sizeof(hostdir), "%s-", 1578 nc->ch->hostname); 1579 len += strftime(hostdir + len, sizeof(hostdir) - len, "%F-%T/", 1580 gmtime(&iop->dpp->cl_connect_time)); 1581 } else { 1582 hostdir[0] = 0; 1583 } 1584 1585 if (fill_ofname(iop->ofn, sizeof(iop->ofn), hostdir, 1586 iop->dpp->buts_name, cpu)) 1587 return 1; 1588 1589 iop->ofp = my_fopen(iop->ofn, "w+"); 1590 if (iop->ofp == NULL) { 1591 fprintf(stderr, "Open output file %s failed: %d/%s\n", 1592 iop->ofn, errno, strerror(errno)); 1593 return 1; 1594 } 1595 1596 if (set_vbuf(iop, _IOLBF, FILE_VBUF_SIZE)) { 1597 fprintf(stderr, "set_vbuf for file %s failed: %d/%s\n", 1598 iop->ofn, errno, strerror(errno)); 1599 fclose(iop->ofp); 1600 return 1; 1601 } 1602 1603 iop->ofd = fileno(iop->ofp); 1604 return 0; 1605 } 1606 1607 static void close_iop(struct io_info *iop) 1608 { 1609 struct mmap_info *mip = &iop->mmap_info; 1610 1611 if (mip->fs_buf) 1612 munmap(mip->fs_buf, mip->fs_buf_len); 1613 1614 if (!piped_output) { 1615 if (ftruncate(fileno(iop->ofp), mip->fs_size) < 0) { 1616 fprintf(stderr, 1617 "Ignoring err: ftruncate(%s): %d/%s\n", 1618 iop->ofn, errno, strerror(errno)); 1619 } 1620 } 1621 1622 if (iop->ofp) 1623 fclose(iop->ofp); 1624 if (iop->obuf) 1625 free(iop->obuf); 1626 } 1627 1628 static void close_ios(struct tracer *tp) 1629 { 1630 while (tp->nios > 0) { 1631 struct io_info *iop = &tp->ios[--tp->nios]; 1632 1633 iop->dpp->drops = get_drops(iop->dpp); 1634 if (iop->ifd >= 0) 1635 close(iop->ifd); 1636 1637 if (iop->ofp) 1638 close_iop(iop); 1639 else if (iop->ofd >= 0) { 1640 struct devpath *dpp = iop->dpp; 1641 1642 net_send_close(iop->ofd, dpp->buts_name, dpp->drops); 1643 net_close_connection(&iop->ofd); 1644 } 1645 } 1646 1647 free(tp->ios); 1648 free(tp->pfds); 1649 } 1650 1651 static int open_ios(struct tracer *tp) 1652 { 1653 struct pollfd *pfd; 1654 struct io_info *iop; 1655 struct list_head *p; 1656 1657 tp->ios = calloc(ndevs, sizeof(struct io_info)); 1658 memset(tp->ios, 0, ndevs * sizeof(struct io_info)); 1659 1660 tp->pfds = calloc(ndevs, sizeof(struct pollfd)); 1661 memset(tp->pfds, 0, ndevs * sizeof(struct pollfd)); 1662 1663 tp->nios = 0; 1664 iop = tp->ios; 1665 pfd = tp->pfds; 1666 __list_for_each(p, &devpaths) { 1667 struct devpath *dpp = list_entry(p, struct devpath, head); 1668 1669 iop->dpp = dpp; 1670 iop->ofd = -1; 1671 snprintf(iop->ifn, sizeof(iop->ifn), "%s/block/%s/trace%d", 1672 debugfs_path, dpp->buts_name, tp->cpu); 1673 1674 iop->ifd = my_open(iop->ifn, O_RDONLY | O_NONBLOCK); 1675 if (iop->ifd < 0) { 1676 fprintf(stderr, "Thread %d failed open %s: %d/%s\n", 1677 tp->cpu, iop->ifn, errno, strerror(errno)); 1678 return 1; 1679 } 1680 1681 init_mmap_info(&iop->mmap_info); 1682 1683 pfd->fd = iop->ifd; 1684 pfd->events = POLLIN; 1685 1686 if (piped_output) 1687 ; 1688 else if (net_client_use_sendfile()) { 1689 iop->ofd = net_setup_client(); 1690 if (iop->ofd < 0) 1691 goto err; 1692 net_send_open(iop->ofd, tp->cpu, dpp->buts_name); 1693 } else if (net_mode == Net_none) { 1694 if (iop_open(iop, tp->cpu)) 1695 goto err; 1696 } else { 1697 /* 1698 * This ensures that the server knows about all 1699 * connections & devices before _any_ closes 1700 */ 1701 net_send_open(cl_fds[tp->cpu], tp->cpu, dpp->buts_name); 1702 } 1703 1704 pfd++; 1705 iop++; 1706 tp->nios++; 1707 } 1708 1709 return 0; 1710 1711 err: 1712 close(iop->ifd); /* tp->nios _not_ bumped */ 1713 close_ios(tp); 1714 return 1; 1715 } 1716 1717 static int handle_pfds_file(struct tracer *tp, int nevs, int force_read) 1718 { 1719 struct mmap_info *mip; 1720 int i, ret, nentries = 0; 1721 struct pollfd *pfd = tp->pfds; 1722 struct io_info *iop = tp->ios; 1723 1724 for (i = 0; nevs > 0 && i < ndevs; i++, pfd++, iop++) { 1725 if (pfd->revents & POLLIN || force_read) { 1726 mip = &iop->mmap_info; 1727 1728 ret = setup_mmap(iop->ofd, buf_size, mip, tp); 1729 if (ret < 0) { 1730 pfd->events = 0; 1731 break; 1732 } 1733 1734 ret = read(iop->ifd, mip->fs_buf + mip->fs_off, 1735 buf_size); 1736 if (ret > 0) { 1737 pdc_dr_update(iop->dpp, tp->cpu, ret); 1738 mip->fs_size += ret; 1739 mip->fs_off += ret; 1740 nentries++; 1741 } else if (ret == 0) { 1742 /* 1743 * Short reads after we're done stop us 1744 * from trying reads. 1745 */ 1746 if (tp->is_done) 1747 clear_events(pfd); 1748 } else { 1749 read_err(tp->cpu, iop->ifn); 1750 if (errno != EAGAIN || tp->is_done) 1751 clear_events(pfd); 1752 } 1753 nevs--; 1754 } 1755 } 1756 1757 return nentries; 1758 } 1759 1760 static int handle_pfds_netclient(struct tracer *tp, int nevs, int force_read) 1761 { 1762 struct stat sb; 1763 int i, nentries = 0; 1764 struct pollfd *pfd = tp->pfds; 1765 struct io_info *iop = tp->ios; 1766 1767 for (i = 0; i < ndevs; i++, pfd++, iop++) { 1768 if (pfd->revents & POLLIN || force_read) { 1769 if (fstat(iop->ifd, &sb) < 0) { 1770 perror(iop->ifn); 1771 pfd->events = 0; 1772 } else if (sb.st_size > (off_t)iop->data_queued) { 1773 iop->ready = sb.st_size - iop->data_queued; 1774 iop->data_queued = sb.st_size; 1775 1776 if (!net_sendfile_data(tp, iop)) { 1777 pdc_dr_update(iop->dpp, tp->cpu, 1778 iop->ready); 1779 nentries++; 1780 } else 1781 clear_events(pfd); 1782 } 1783 if (--nevs == 0) 1784 break; 1785 } 1786 } 1787 1788 if (nentries) 1789 incr_entries(nentries); 1790 1791 return nentries; 1792 } 1793 1794 static int handle_pfds_entries(struct tracer *tp, int nevs, int force_read) 1795 { 1796 int i, nentries = 0; 1797 struct trace_buf *tbp; 1798 struct pollfd *pfd = tp->pfds; 1799 struct io_info *iop = tp->ios; 1800 1801 tbp = alloc_trace_buf(tp->cpu, buf_size); 1802 for (i = 0; i < ndevs; i++, pfd++, iop++) { 1803 if (pfd->revents & POLLIN || force_read) { 1804 tbp->len = read(iop->ifd, tbp->buf, buf_size); 1805 if (tbp->len > 0) { 1806 pdc_dr_update(iop->dpp, tp->cpu, tbp->len); 1807 add_trace_buf(iop->dpp, tp->cpu, &tbp); 1808 nentries++; 1809 } else if (tbp->len == 0) { 1810 /* 1811 * Short reads after we're done stop us 1812 * from trying reads. 1813 */ 1814 if (tp->is_done) 1815 clear_events(pfd); 1816 } else { 1817 read_err(tp->cpu, iop->ifn); 1818 if (errno != EAGAIN || tp->is_done) 1819 clear_events(pfd); 1820 } 1821 if (!piped_output && --nevs == 0) 1822 break; 1823 } 1824 } 1825 free(tbp); 1826 1827 if (nentries) 1828 incr_entries(nentries); 1829 1830 return nentries; 1831 } 1832 1833 static void *thread_main(void *arg) 1834 { 1835 int ret, ndone, to_val; 1836 struct tracer *tp = arg; 1837 1838 ret = lock_on_cpu(tp->cpu); 1839 if (ret) 1840 goto err; 1841 1842 ret = open_ios(tp); 1843 if (ret) 1844 goto err; 1845 1846 if (piped_output) 1847 to_val = 50; /* Frequent partial handles */ 1848 else 1849 to_val = 500; /* 1/2 second intervals */ 1850 1851 1852 tracer_signal_ready(tp, Th_running, 0); 1853 tracer_wait_unblock(tp); 1854 1855 while (!tp->is_done) { 1856 ndone = poll(tp->pfds, ndevs, to_val); 1857 if (ndone || piped_output) 1858 (void)handle_pfds(tp, ndone, piped_output); 1859 else if (ndone < 0 && errno != EINTR) 1860 fprintf(stderr, "Thread %d poll failed: %d/%s\n", 1861 tp->cpu, errno, strerror(errno)); 1862 } 1863 1864 /* 1865 * Trace is stopped, pull data until we get a short read 1866 */ 1867 while (handle_pfds(tp, ndevs, 1) > 0) 1868 ; 1869 1870 close_ios(tp); 1871 tracer_signal_ready(tp, Th_leaving, 0); 1872 return NULL; 1873 1874 err: 1875 tracer_signal_ready(tp, Th_error, ret); 1876 return NULL; 1877 } 1878 1879 static int start_tracer(int cpu) 1880 { 1881 struct tracer *tp; 1882 1883 tp = malloc(sizeof(*tp)); 1884 memset(tp, 0, sizeof(*tp)); 1885 1886 INIT_LIST_HEAD(&tp->head); 1887 tp->status = 0; 1888 tp->cpu = cpu; 1889 1890 if (pthread_create(&tp->thread, NULL, thread_main, tp)) { 1891 fprintf(stderr, "FAILED to start thread on CPU %d: %d/%s\n", 1892 cpu, errno, strerror(errno)); 1893 free(tp); 1894 return 1; 1895 } 1896 1897 list_add_tail(&tp->head, &tracers); 1898 return 0; 1899 } 1900 1901 static int create_output_files(int cpu) 1902 { 1903 char fname[MAXPATHLEN + 64]; 1904 struct list_head *p; 1905 FILE *f; 1906 1907 __list_for_each(p, &devpaths) { 1908 struct devpath *dpp = list_entry(p, struct devpath, head); 1909 1910 if (fill_ofname(fname, sizeof(fname), NULL, dpp->buts_name, 1911 cpu)) 1912 return 1; 1913 f = my_fopen(fname, "w+"); 1914 if (!f) 1915 return 1; 1916 fclose(f); 1917 } 1918 return 0; 1919 } 1920 1921 static void start_tracers(void) 1922 { 1923 int cpu, started = 0; 1924 struct list_head *p; 1925 size_t alloc_size = CPU_ALLOC_SIZE(max_cpus); 1926 1927 for (cpu = 0; cpu < max_cpus; cpu++) { 1928 if (!CPU_ISSET_S(cpu, alloc_size, online_cpus)) { 1929 /* 1930 * Create fake empty output files so that other tools 1931 * like blkparse don't have to bother with sparse CPU 1932 * number space. 1933 */ 1934 if (create_output_files(cpu)) 1935 break; 1936 continue; 1937 } 1938 if (start_tracer(cpu)) 1939 break; 1940 started++; 1941 } 1942 1943 wait_tracers_ready(started); 1944 1945 __list_for_each(p, &tracers) { 1946 struct tracer *tp = list_entry(p, struct tracer, head); 1947 if (tp->status) 1948 fprintf(stderr, 1949 "FAILED to start thread on CPU %d: %d/%s\n", 1950 tp->cpu, tp->status, strerror(tp->status)); 1951 } 1952 } 1953 1954 static void stop_tracers(void) 1955 { 1956 struct list_head *p; 1957 1958 /* 1959 * Stop the tracing - makes the tracer threads clean up quicker. 1960 */ 1961 __list_for_each(p, &devpaths) { 1962 struct devpath *dpp = list_entry(p, struct devpath, head); 1963 (void)ioctl(dpp->fd, BLKTRACESTOP); 1964 } 1965 1966 /* 1967 * Tell each tracer to quit 1968 */ 1969 __list_for_each(p, &tracers) { 1970 struct tracer *tp = list_entry(p, struct tracer, head); 1971 tp->is_done = 1; 1972 } 1973 pthread_cond_broadcast(&mt_cond); 1974 } 1975 1976 static void del_tracers(void) 1977 { 1978 struct list_head *p, *q; 1979 1980 list_for_each_safe(p, q, &tracers) { 1981 struct tracer *tp = list_entry(p, struct tracer, head); 1982 1983 list_del(&tp->head); 1984 free(tp); 1985 } 1986 } 1987 1988 static void wait_tracers(void) 1989 { 1990 struct list_head *p; 1991 1992 if (use_tracer_devpaths()) 1993 process_trace_bufs(); 1994 1995 wait_tracers_leaving(); 1996 1997 __list_for_each(p, &tracers) { 1998 int ret; 1999 struct tracer *tp = list_entry(p, struct tracer, head); 2000 2001 ret = pthread_join(tp->thread, NULL); 2002 if (ret) 2003 fprintf(stderr, "Thread join %d failed %d\n", 2004 tp->cpu, ret); 2005 } 2006 2007 if (use_tracer_devpaths()) 2008 clean_trace_bufs(); 2009 2010 get_all_drops(); 2011 } 2012 2013 static void exit_tracing(void) 2014 { 2015 signal(SIGINT, SIG_IGN); 2016 signal(SIGHUP, SIG_IGN); 2017 signal(SIGTERM, SIG_IGN); 2018 signal(SIGALRM, SIG_IGN); 2019 2020 stop_tracers(); 2021 wait_tracers(); 2022 del_tracers(); 2023 rel_devpaths(); 2024 } 2025 2026 static void handle_sigint(__attribute__((__unused__)) int sig) 2027 { 2028 done = 1; 2029 stop_tracers(); 2030 } 2031 2032 static void show_stats(struct list_head *devpaths) 2033 { 2034 FILE *ofp; 2035 struct list_head *p; 2036 unsigned long long nevents, data_read; 2037 unsigned long long total_drops = 0; 2038 unsigned long long total_events = 0; 2039 2040 if (piped_output) 2041 ofp = my_fopen("/dev/null", "w"); 2042 else 2043 ofp = stdout; 2044 2045 __list_for_each(p, devpaths) { 2046 int cpu; 2047 struct pdc_stats *sp; 2048 struct devpath *dpp = list_entry(p, struct devpath, head); 2049 2050 if (net_mode == Net_server) 2051 printf("server: end of run for %s:%s\n", 2052 dpp->ch->hostname, dpp->buts_name); 2053 2054 data_read = 0; 2055 nevents = 0; 2056 2057 fprintf(ofp, "=== %s ===\n", dpp->buts_name); 2058 for (cpu = 0, sp = dpp->stats; cpu < dpp->ncpus; cpu++, sp++) { 2059 /* 2060 * Estimate events if not known... 2061 */ 2062 if (sp->nevents == 0) { 2063 sp->nevents = sp->data_read / 2064 sizeof(struct blk_io_trace); 2065 } 2066 2067 fprintf(ofp, 2068 " CPU%3d: %20llu events, %8llu KiB data\n", 2069 cpu, sp->nevents, (sp->data_read + 1023) >> 10); 2070 2071 data_read += sp->data_read; 2072 nevents += sp->nevents; 2073 } 2074 2075 fprintf(ofp, " Total: %20llu events (dropped %llu)," 2076 " %8llu KiB data\n", nevents, 2077 dpp->drops, (data_read + 1024) >> 10); 2078 2079 total_drops += dpp->drops; 2080 total_events += (nevents + dpp->drops); 2081 } 2082 2083 fflush(ofp); 2084 if (piped_output) 2085 fclose(ofp); 2086 2087 if (total_drops) { 2088 double drops_ratio = 1.0; 2089 2090 if (total_events) 2091 drops_ratio = (double)total_drops/(double)total_events; 2092 2093 fprintf(stderr, "\nYou have %llu (%5.1lf%%) dropped events\n" 2094 "Consider using a larger buffer size (-b) " 2095 "and/or more buffers (-n)\n", 2096 total_drops, 100.0 * drops_ratio); 2097 } 2098 } 2099 2100 static int handle_args(int argc, char *argv[]) 2101 { 2102 int c, i; 2103 struct statfs st; 2104 int act_mask_tmp = 0; 2105 2106 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) { 2107 switch (c) { 2108 case 'a': 2109 i = find_mask_map(optarg); 2110 if (i < 0) { 2111 fprintf(stderr, "Invalid action mask %s\n", 2112 optarg); 2113 return 1; 2114 } 2115 act_mask_tmp |= i; 2116 break; 2117 2118 case 'A': 2119 if ((sscanf(optarg, "%x", &i) != 1) || 2120 !valid_act_opt(i)) { 2121 fprintf(stderr, 2122 "Invalid set action mask %s/0x%x\n", 2123 optarg, i); 2124 return 1; 2125 } 2126 act_mask_tmp = i; 2127 break; 2128 2129 case 'd': 2130 if (add_devpath(optarg) != 0) 2131 return 1; 2132 break; 2133 2134 case 'I': { 2135 char dev_line[256]; 2136 FILE *ifp = my_fopen(optarg, "r"); 2137 2138 if (!ifp) { 2139 fprintf(stderr, 2140 "Invalid file for devices %s\n", 2141 optarg); 2142 return 1; 2143 } 2144 2145 while (fscanf(ifp, "%s\n", dev_line) == 1) { 2146 if (add_devpath(dev_line) != 0) { 2147 fclose(ifp); 2148 return 1; 2149 } 2150 } 2151 fclose(ifp); 2152 break; 2153 } 2154 2155 case 'r': 2156 debugfs_path = optarg; 2157 break; 2158 2159 case 'o': 2160 output_name = optarg; 2161 break; 2162 case 'k': 2163 kill_running_trace = 1; 2164 break; 2165 case 'w': 2166 stop_watch = atoi(optarg); 2167 if (stop_watch <= 0) { 2168 fprintf(stderr, 2169 "Invalid stopwatch value (%d secs)\n", 2170 stop_watch); 2171 return 1; 2172 } 2173 break; 2174 case 'V': 2175 case 'v': 2176 printf("%s version %s\n", argv[0], blktrace_version); 2177 exit(0); 2178 /*NOTREACHED*/ 2179 case 'b': 2180 buf_size = strtoul(optarg, NULL, 10); 2181 if (buf_size <= 0 || buf_size > 16*1024) { 2182 fprintf(stderr, "Invalid buffer size (%lu)\n", 2183 buf_size); 2184 return 1; 2185 } 2186 buf_size <<= 10; 2187 break; 2188 case 'n': 2189 buf_nr = strtoul(optarg, NULL, 10); 2190 if (buf_nr <= 0) { 2191 fprintf(stderr, 2192 "Invalid buffer nr (%lu)\n", buf_nr); 2193 return 1; 2194 } 2195 break; 2196 case 'D': 2197 output_dir = optarg; 2198 break; 2199 case 'h': 2200 net_mode = Net_client; 2201 memset(hostname, 0, sizeof(hostname)); 2202 strncpy(hostname, optarg, sizeof(hostname)); 2203 hostname[sizeof(hostname) - 1] = '\0'; 2204 break; 2205 case 'l': 2206 net_mode = Net_server; 2207 break; 2208 case 'p': 2209 net_port = atoi(optarg); 2210 break; 2211 case 's': 2212 net_use_sendfile = 0; 2213 break; 2214 default: 2215 show_usage(argv[0]); 2216 exit(1); 2217 /*NOTREACHED*/ 2218 } 2219 } 2220 2221 while (optind < argc) 2222 if (add_devpath(argv[optind++]) != 0) 2223 return 1; 2224 2225 if (net_mode != Net_server && ndevs == 0) { 2226 show_usage(argv[0]); 2227 return 1; 2228 } 2229 2230 if (statfs(debugfs_path, &st) < 0) { 2231 fprintf(stderr, "Invalid debug path %s: %d/%s\n", 2232 debugfs_path, errno, strerror(errno)); 2233 return 1; 2234 } 2235 2236 if (st.f_type != (long)DEBUGFS_TYPE) { 2237 fprintf(stderr, "Debugfs is not mounted at %s\n", debugfs_path); 2238 return 1; 2239 } 2240 2241 if (act_mask_tmp != 0) 2242 act_mask = act_mask_tmp; 2243 2244 if (net_mode == Net_client && net_setup_addr()) 2245 return 1; 2246 2247 /* 2248 * Set up for appropriate PFD handler based upon output name. 2249 */ 2250 if (net_client_use_sendfile()) 2251 handle_pfds = handle_pfds_netclient; 2252 else if (net_client_use_send()) 2253 handle_pfds = handle_pfds_entries; 2254 else if (output_name && (strcmp(output_name, "-") == 0)) { 2255 piped_output = 1; 2256 handle_pfds = handle_pfds_entries; 2257 pfp = stdout; 2258 if (setvbuf(pfp, NULL, _IONBF, 0)) { 2259 perror("setvbuf stdout"); 2260 return 1; 2261 } 2262 } else 2263 handle_pfds = handle_pfds_file; 2264 return 0; 2265 } 2266 2267 static void ch_add_connection(struct net_server_s *ns, struct cl_host *ch, 2268 int fd) 2269 { 2270 struct cl_conn *nc; 2271 2272 nc = malloc(sizeof(*nc)); 2273 memset(nc, 0, sizeof(*nc)); 2274 2275 time(&nc->connect_time); 2276 nc->ch = ch; 2277 nc->fd = fd; 2278 nc->ncpus = -1; 2279 2280 list_add_tail(&nc->ch_head, &ch->conn_list); 2281 ch->connects++; 2282 2283 list_add_tail(&nc->ns_head, &ns->conn_list); 2284 ns->connects++; 2285 ns->pfds = realloc(ns->pfds, (ns->connects+1) * sizeof(struct pollfd)); 2286 } 2287 2288 static void ch_rem_connection(struct net_server_s *ns, struct cl_host *ch, 2289 struct cl_conn *nc) 2290 { 2291 net_close_connection(&nc->fd); 2292 2293 list_del(&nc->ch_head); 2294 ch->connects--; 2295 2296 list_del(&nc->ns_head); 2297 ns->connects--; 2298 ns->pfds = realloc(ns->pfds, (ns->connects+1) * sizeof(struct pollfd)); 2299 2300 free(nc); 2301 } 2302 2303 static struct cl_host *net_find_client_host(struct net_server_s *ns, 2304 struct in_addr cl_in_addr) 2305 { 2306 struct list_head *p; 2307 2308 __list_for_each(p, &ns->ch_list) { 2309 struct cl_host *ch = list_entry(p, struct cl_host, head); 2310 2311 if (in_addr_eq(ch->cl_in_addr, cl_in_addr)) 2312 return ch; 2313 } 2314 2315 return NULL; 2316 } 2317 2318 static struct cl_host *net_add_client_host(struct net_server_s *ns, 2319 struct sockaddr_in *addr) 2320 { 2321 struct cl_host *ch; 2322 2323 ch = malloc(sizeof(*ch)); 2324 memset(ch, 0, sizeof(*ch)); 2325 2326 ch->ns = ns; 2327 ch->cl_in_addr = addr->sin_addr; 2328 list_add_tail(&ch->head, &ns->ch_list); 2329 ns->nchs++; 2330 2331 ch->hostname = strdup(inet_ntoa(addr->sin_addr)); 2332 printf("server: connection from %s\n", ch->hostname); 2333 2334 INIT_LIST_HEAD(&ch->conn_list); 2335 INIT_LIST_HEAD(&ch->devpaths); 2336 2337 return ch; 2338 } 2339 2340 static void device_done(struct devpath *dpp, int ncpus) 2341 { 2342 int cpu; 2343 struct io_info *iop; 2344 2345 for (cpu = 0, iop = dpp->ios; cpu < ncpus; cpu++, iop++) 2346 close_iop(iop); 2347 2348 list_del(&dpp->head); 2349 dpp_free(dpp); 2350 } 2351 2352 static void net_ch_remove(struct cl_host *ch, int ncpus) 2353 { 2354 struct list_head *p, *q; 2355 struct net_server_s *ns = ch->ns; 2356 2357 list_for_each_safe(p, q, &ch->devpaths) { 2358 struct devpath *dpp = list_entry(p, struct devpath, head); 2359 device_done(dpp, ncpus); 2360 } 2361 2362 list_for_each_safe(p, q, &ch->conn_list) { 2363 struct cl_conn *nc = list_entry(p, struct cl_conn, ch_head); 2364 2365 ch_rem_connection(ns, ch, nc); 2366 } 2367 2368 list_del(&ch->head); 2369 ns->nchs--; 2370 2371 if (ch->hostname) 2372 free(ch->hostname); 2373 free(ch); 2374 } 2375 2376 static void net_add_connection(struct net_server_s *ns) 2377 { 2378 int fd; 2379 struct cl_host *ch; 2380 socklen_t socklen = sizeof(ns->addr); 2381 2382 fd = my_accept(ns->listen_fd, (struct sockaddr *)&ns->addr, &socklen); 2383 if (fd < 0) { 2384 /* 2385 * This is OK: we just won't accept this connection, 2386 * nothing fatal. 2387 */ 2388 perror("accept"); 2389 } else { 2390 ch = net_find_client_host(ns, ns->addr.sin_addr); 2391 if (!ch) 2392 ch = net_add_client_host(ns, &ns->addr); 2393 2394 ch_add_connection(ns, ch, fd); 2395 } 2396 } 2397 2398 static struct devpath *nc_add_dpp(struct cl_conn *nc, 2399 struct blktrace_net_hdr *bnh, 2400 time_t connect_time) 2401 { 2402 int cpu; 2403 struct io_info *iop; 2404 struct devpath *dpp; 2405 2406 dpp = malloc(sizeof(*dpp)); 2407 memset(dpp, 0, sizeof(*dpp)); 2408 2409 dpp->buts_name = strdup(bnh->buts_name); 2410 dpp->path = strdup(bnh->buts_name); 2411 dpp->fd = -1; 2412 dpp->ch = nc->ch; 2413 dpp->cl_id = bnh->cl_id; 2414 dpp->cl_connect_time = connect_time; 2415 dpp->ncpus = nc->ncpus; 2416 dpp->stats = calloc(dpp->ncpus, sizeof(*dpp->stats)); 2417 memset(dpp->stats, 0, dpp->ncpus * sizeof(*dpp->stats)); 2418 2419 list_add_tail(&dpp->head, &nc->ch->devpaths); 2420 nc->ch->ndevs++; 2421 2422 dpp->ios = calloc(nc->ncpus, sizeof(*iop)); 2423 memset(dpp->ios, 0, ndevs * sizeof(*iop)); 2424 2425 for (cpu = 0, iop = dpp->ios; cpu < nc->ncpus; cpu++, iop++) { 2426 iop->dpp = dpp; 2427 iop->nc = nc; 2428 init_mmap_info(&iop->mmap_info); 2429 2430 if (iop_open(iop, cpu)) 2431 goto err; 2432 } 2433 2434 return dpp; 2435 2436 err: 2437 /* 2438 * Need to unravel what's been done... 2439 */ 2440 while (cpu >= 0) 2441 close_iop(&dpp->ios[cpu--]); 2442 dpp_free(dpp); 2443 2444 return NULL; 2445 } 2446 2447 static struct devpath *nc_find_dpp(struct cl_conn *nc, 2448 struct blktrace_net_hdr *bnh) 2449 { 2450 struct list_head *p; 2451 time_t connect_time = nc->connect_time; 2452 2453 __list_for_each(p, &nc->ch->devpaths) { 2454 struct devpath *dpp = list_entry(p, struct devpath, head); 2455 2456 if (!strcmp(dpp->buts_name, bnh->buts_name)) 2457 return dpp; 2458 2459 if (dpp->cl_id == bnh->cl_id) 2460 connect_time = dpp->cl_connect_time; 2461 } 2462 2463 return nc_add_dpp(nc, bnh, connect_time); 2464 } 2465 2466 static void net_client_read_data(struct cl_conn *nc, struct devpath *dpp, 2467 struct blktrace_net_hdr *bnh) 2468 { 2469 int ret; 2470 struct io_info *iop = &dpp->ios[bnh->cpu]; 2471 struct mmap_info *mip = &iop->mmap_info; 2472 2473 if (setup_mmap(iop->ofd, bnh->len, &iop->mmap_info, NULL)) { 2474 fprintf(stderr, "ncd(%s:%d): mmap failed\n", 2475 nc->ch->hostname, nc->fd); 2476 exit(1); 2477 } 2478 2479 ret = net_recv_data(nc->fd, mip->fs_buf + mip->fs_off, bnh->len); 2480 if (ret > 0) { 2481 pdc_dr_update(dpp, bnh->cpu, ret); 2482 mip->fs_size += ret; 2483 mip->fs_off += ret; 2484 } else if (ret < 0) 2485 exit(1); 2486 } 2487 2488 /* 2489 * Returns 1 if we closed a host - invalidates other polling information 2490 * that may be present. 2491 */ 2492 static int net_client_data(struct cl_conn *nc) 2493 { 2494 int ret; 2495 struct devpath *dpp; 2496 struct blktrace_net_hdr bnh; 2497 2498 ret = net_get_header(nc, &bnh); 2499 if (ret == 0) 2500 return 0; 2501 2502 if (ret < 0) { 2503 fprintf(stderr, "ncd(%d): header read failed\n", nc->fd); 2504 exit(1); 2505 } 2506 2507 if (data_is_native == -1 && check_data_endianness(bnh.magic)) { 2508 fprintf(stderr, "ncd(%d): received data is bad\n", nc->fd); 2509 exit(1); 2510 } 2511 2512 if (!data_is_native) { 2513 bnh.magic = be32_to_cpu(bnh.magic); 2514 bnh.cpu = be32_to_cpu(bnh.cpu); 2515 bnh.max_cpus = be32_to_cpu(bnh.max_cpus); 2516 bnh.len = be32_to_cpu(bnh.len); 2517 bnh.cl_id = be32_to_cpu(bnh.cl_id); 2518 bnh.buf_size = be32_to_cpu(bnh.buf_size); 2519 bnh.buf_nr = be32_to_cpu(bnh.buf_nr); 2520 bnh.page_size = be32_to_cpu(bnh.page_size); 2521 } 2522 2523 if ((bnh.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) { 2524 fprintf(stderr, "ncd(%s:%d): bad data magic\n", 2525 nc->ch->hostname, nc->fd); 2526 exit(1); 2527 } 2528 2529 if (nc->ncpus == -1) 2530 nc->ncpus = bnh.max_cpus; 2531 2532 /* 2533 * len == 0 means the other end is sending us a new connection/dpp 2534 * len == 1 means that the other end signalled end-of-run 2535 */ 2536 dpp = nc_find_dpp(nc, &bnh); 2537 if (bnh.len == 0) { 2538 /* 2539 * Just adding in the dpp above is enough 2540 */ 2541 ack_open_close(nc->fd, dpp->buts_name); 2542 nc->ch->cl_opens++; 2543 } else if (bnh.len == 1) { 2544 /* 2545 * overload cpu count with dropped events 2546 */ 2547 dpp->drops = bnh.cpu; 2548 2549 ack_open_close(nc->fd, dpp->buts_name); 2550 if (--nc->ch->cl_opens == 0) { 2551 show_stats(&nc->ch->devpaths); 2552 net_ch_remove(nc->ch, nc->ncpus); 2553 return 1; 2554 } 2555 } else 2556 net_client_read_data(nc, dpp, &bnh); 2557 2558 return 0; 2559 } 2560 2561 static void handle_client_data(struct net_server_s *ns, int events) 2562 { 2563 struct cl_conn *nc; 2564 struct pollfd *pfd; 2565 struct list_head *p, *q; 2566 2567 pfd = &ns->pfds[1]; 2568 list_for_each_safe(p, q, &ns->conn_list) { 2569 if (pfd->revents & POLLIN) { 2570 nc = list_entry(p, struct cl_conn, ns_head); 2571 2572 if (net_client_data(nc) || --events == 0) 2573 break; 2574 } 2575 pfd++; 2576 } 2577 } 2578 2579 static void net_setup_pfds(struct net_server_s *ns) 2580 { 2581 struct pollfd *pfd; 2582 struct list_head *p; 2583 2584 ns->pfds[0].fd = ns->listen_fd; 2585 ns->pfds[0].events = POLLIN; 2586 2587 pfd = &ns->pfds[1]; 2588 __list_for_each(p, &ns->conn_list) { 2589 struct cl_conn *nc = list_entry(p, struct cl_conn, ns_head); 2590 2591 pfd->fd = nc->fd; 2592 pfd->events = POLLIN; 2593 pfd++; 2594 } 2595 } 2596 2597 static int net_server_handle_connections(struct net_server_s *ns) 2598 { 2599 int events; 2600 2601 printf("server: waiting for connections...\n"); 2602 2603 while (!done) { 2604 net_setup_pfds(ns); 2605 events = poll(ns->pfds, ns->connects + 1, -1); 2606 if (events < 0) { 2607 if (errno != EINTR) { 2608 perror("FATAL: poll error"); 2609 return 1; 2610 } 2611 } else if (events > 0) { 2612 if (ns->pfds[0].revents & POLLIN) { 2613 net_add_connection(ns); 2614 events--; 2615 } 2616 2617 if (events) 2618 handle_client_data(ns, events); 2619 } 2620 } 2621 2622 return 0; 2623 } 2624 2625 static int net_server(void) 2626 { 2627 int fd, opt; 2628 int ret = 1; 2629 struct net_server_s net_server; 2630 struct net_server_s *ns = &net_server; 2631 2632 memset(ns, 0, sizeof(*ns)); 2633 INIT_LIST_HEAD(&ns->ch_list); 2634 INIT_LIST_HEAD(&ns->conn_list); 2635 ns->pfds = malloc(sizeof(struct pollfd)); 2636 2637 fd = my_socket(AF_INET, SOCK_STREAM, 0); 2638 if (fd < 0) { 2639 perror("server: socket"); 2640 goto out; 2641 } 2642 2643 opt = 1; 2644 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) { 2645 perror("setsockopt"); 2646 goto out; 2647 } 2648 2649 memset(&ns->addr, 0, sizeof(ns->addr)); 2650 ns->addr.sin_family = AF_INET; 2651 ns->addr.sin_addr.s_addr = htonl(INADDR_ANY); 2652 ns->addr.sin_port = htons(net_port); 2653 2654 if (bind(fd, (struct sockaddr *) &ns->addr, sizeof(ns->addr)) < 0) { 2655 perror("bind"); 2656 goto out; 2657 } 2658 2659 if (listen(fd, 1) < 0) { 2660 perror("listen"); 2661 goto out; 2662 } 2663 2664 /* 2665 * The actual server looping is done here: 2666 */ 2667 ns->listen_fd = fd; 2668 ret = net_server_handle_connections(ns); 2669 2670 /* 2671 * Clean up and return... 2672 */ 2673 out: 2674 free(ns->pfds); 2675 return ret; 2676 } 2677 2678 static int run_tracers(void) 2679 { 2680 atexit(exit_tracing); 2681 if (net_mode == Net_client) 2682 printf("blktrace: connecting to %s\n", hostname); 2683 2684 if (setup_buts()) 2685 return 1; 2686 2687 if (use_tracer_devpaths()) { 2688 if (setup_tracer_devpaths()) 2689 return 1; 2690 2691 if (piped_output) 2692 handle_list = handle_list_file; 2693 else 2694 handle_list = handle_list_net; 2695 } 2696 2697 start_tracers(); 2698 if (nthreads_running == ncpus) { 2699 unblock_tracers(); 2700 start_buts(); 2701 if (net_mode == Net_client) 2702 printf("blktrace: connected!\n"); 2703 if (stop_watch) 2704 alarm(stop_watch); 2705 } else 2706 stop_tracers(); 2707 2708 wait_tracers(); 2709 if (nthreads_running == ncpus) 2710 show_stats(&devpaths); 2711 if (net_client_use_send()) 2712 close_client_connections(); 2713 del_tracers(); 2714 2715 return 0; 2716 } 2717 2718 static cpu_set_t *get_online_cpus(void) 2719 { 2720 FILE *cpus; 2721 cpu_set_t *set; 2722 size_t alloc_size; 2723 int cpuid, prevcpuid = -1; 2724 char nextch; 2725 int n, ncpu, curcpu = 0; 2726 int *cpu_nums; 2727 2728 ncpu = sysconf(_SC_NPROCESSORS_CONF); 2729 if (ncpu < 0) 2730 return NULL; 2731 2732 cpu_nums = malloc(sizeof(int)*ncpu); 2733 if (!cpu_nums) { 2734 errno = ENOMEM; 2735 return NULL; 2736 } 2737 2738 /* 2739 * There is no way to easily get maximum CPU number. So we have to 2740 * parse the file first to find it out and then create appropriate 2741 * cpuset 2742 */ 2743 cpus = my_fopen("/sys/devices/system/cpu/online", "r"); 2744 for (;;) { 2745 n = fscanf(cpus, "%d%c", &cpuid, &nextch); 2746 if (n <= 0) 2747 break; 2748 if (n == 2 && nextch == '-') { 2749 prevcpuid = cpuid; 2750 continue; 2751 } 2752 if (prevcpuid == -1) 2753 prevcpuid = cpuid; 2754 while (prevcpuid <= cpuid) { 2755 /* More CPUs listed than configured? */ 2756 if (curcpu >= ncpu) { 2757 errno = EINVAL; 2758 return NULL; 2759 } 2760 cpu_nums[curcpu++] = prevcpuid++; 2761 } 2762 prevcpuid = -1; 2763 } 2764 fclose(cpus); 2765 2766 ncpu = curcpu; 2767 max_cpus = cpu_nums[ncpu - 1] + 1; 2768 2769 /* Now that we have maximum cpu number, create a cpuset */ 2770 set = CPU_ALLOC(max_cpus); 2771 if (!set) { 2772 errno = ENOMEM; 2773 return NULL; 2774 } 2775 alloc_size = CPU_ALLOC_SIZE(max_cpus); 2776 CPU_ZERO_S(alloc_size, set); 2777 2778 for (curcpu = 0; curcpu < ncpu; curcpu++) 2779 CPU_SET_S(cpu_nums[curcpu], alloc_size, set); 2780 2781 free(cpu_nums); 2782 2783 return set; 2784 } 2785 2786 int main(int argc, char *argv[]) 2787 { 2788 int ret = 0; 2789 2790 setlocale(LC_NUMERIC, "en_US"); 2791 pagesize = getpagesize(); 2792 online_cpus = get_online_cpus(); 2793 if (!online_cpus) { 2794 fprintf(stderr, "cannot get online cpus %d/%s\n", 2795 errno, strerror(errno)); 2796 ret = 1; 2797 goto out; 2798 } else if (handle_args(argc, argv)) { 2799 ret = 1; 2800 goto out; 2801 } 2802 2803 ncpus = CPU_COUNT_S(CPU_ALLOC_SIZE(max_cpus), online_cpus); 2804 if (ndevs > 1 && output_name && strcmp(output_name, "-") != 0) { 2805 fprintf(stderr, "-o not supported with multiple devices\n"); 2806 ret = 1; 2807 goto out; 2808 } 2809 2810 signal(SIGINT, handle_sigint); 2811 signal(SIGHUP, handle_sigint); 2812 signal(SIGTERM, handle_sigint); 2813 signal(SIGALRM, handle_sigint); 2814 signal(SIGPIPE, SIG_IGN); 2815 2816 if (kill_running_trace) { 2817 struct devpath *dpp; 2818 struct list_head *p; 2819 2820 __list_for_each(p, &devpaths) { 2821 dpp = list_entry(p, struct devpath, head); 2822 if (__stop_trace(dpp->fd)) { 2823 fprintf(stderr, 2824 "BLKTRACETEARDOWN %s failed: %d/%s\n", 2825 dpp->path, errno, strerror(errno)); 2826 } 2827 } 2828 } else if (net_mode == Net_server) { 2829 if (output_name) { 2830 fprintf(stderr, "-o ignored in server mode\n"); 2831 output_name = NULL; 2832 } 2833 ret = net_server(); 2834 } else 2835 ret = run_tracers(); 2836 2837 out: 2838 if (pfp) 2839 fclose(pfp); 2840 rel_devpaths(); 2841 return ret; 2842 } 2843