Home | History | Annotate | Download | only in cgroup
      1 /* getdelays.c
      2  *
      3  * Utility to get per-pid and per-tgid delay accounting statistics
      4  * Also illustrates usage of the taskstats interface
      5  *
      6  * Copyright (C) Shailabh Nagar, IBM Corp. 2005
      7  * Copyright (C) Balbir Singh, IBM Corp. 2006
      8  * Copyright (c) Jay Lan, SGI. 2006
      9  *
     10  * Compile with
     11  *	gcc -I/usr/src/linux/include getdelays.c -o getdelays
     12  */
     13 
     14 #include <stdio.h>
     15 #include <stdlib.h>
     16 #include <errno.h>
     17 #include <unistd.h>
     18 #include <poll.h>
     19 #include <string.h>
     20 #include <fcntl.h>
     21 #include <sys/types.h>
     22 #include <sys/stat.h>
     23 #include <sys/socket.h>
     24 #include <signal.h>
     25 #include <linux/types.h>
     26 #include "config.h"
     27 
     28 #ifdef HAVE_LINUX_GENETLINK_H
     29 #include <linux/genetlink.h>
     30 #endif
     31 #ifdef HAVE_LINUX_TASKSTATS_H
     32 #include <linux/taskstats.h>
     33 #endif
     34 #ifdef HAVE_LINUX_CGROUPSTATS_H
     35 #include <linux/cgroupstats.h>
     36 #endif
     37 
     38 #if defined(HAVE_LINUX_GENETLINK_H) && defined(HAVE_LINUX_TASKSTATS_H)
     39 
     40 /*
     41  * Generic macros for dealing with netlink sockets. Might be duplicated
     42  * elsewhere. It is recommended that commercial grade applications use
     43  * libnl or libnetlink and use the interfaces provided by the library
     44  */
     45 #define GENLMSG_DATA(glh)	((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
     46 #define GENLMSG_PAYLOAD(glh)	(NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
     47 #define NLA_DATA(na)		((void *)((char*)(na) + NLA_HDRLEN))
     48 #define NLA_PAYLOAD(len)	(len - NLA_HDRLEN)
     49 
     50 #define err(code, fmt, arg...)			\
     51 	do {					\
     52 		fprintf(stderr, fmt, ##arg);	\
     53 		exit(code);			\
     54 	} while (0)
     55 
     56 int done;
     57 int rcvbufsz;
     58 char name[100];
     59 int dbg;
     60 int print_delays;
     61 int print_io_accounting;
     62 int print_task_context_switch_counts;
     63 
     64 #define PRINTF(fmt, arg...) {			\
     65 	    if (dbg) {				\
     66 		printf(fmt, ##arg);		\
     67 	    }					\
     68 	}
     69 
     70 /* Maximum size of response requested or message sent */
     71 #define MAX_MSG_SIZE	1024
     72 /* Maximum number of cpus expected to be specified in a cpumask */
     73 #define MAX_CPUS	32
     74 
     75 char cpumask[100 + 6 * MAX_CPUS];
     76 
     77 static void usage(void)
     78 {
     79 	fprintf(stderr, "getdelays [-dilv] [-w logfile] [-r bufsize] "
     80 		"[-m cpumask] [-t tgid] [-p pid]\n");
     81 	fprintf(stderr, "  -d: print delayacct stats\n");
     82 	fprintf(stderr, "  -i: print IO accounting (works only with -p)\n");
     83 	fprintf(stderr, "  -l: listen forever\n");
     84 	fprintf(stderr, "  -v: debug on\n");
     85 	fprintf(stderr, "  -C: container path\n");
     86 }
     87 
     88 struct msgtemplate {
     89 	struct nlmsghdr n;
     90 	struct genlmsghdr g;
     91 	char buf[MAX_MSG_SIZE];
     92 };
     93 
     94 /*
     95  * Create a raw netlink socket and bind
     96  */
     97 static int create_nl_socket(int protocol)
     98 {
     99 	int fd;
    100 	struct sockaddr_nl local;
    101 
    102 	fd = socket(AF_NETLINK, SOCK_RAW, protocol);
    103 	if (fd < 0)
    104 		return -1;
    105 
    106 	if (rcvbufsz)
    107 		if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
    108 			       &rcvbufsz, sizeof(rcvbufsz)) < 0) {
    109 			fprintf(stderr, "Unable to set socket rcv buf size "
    110 				"to %d\n", rcvbufsz);
    111 			return -1;
    112 		}
    113 
    114 	memset(&local, 0, sizeof(local));
    115 	local.nl_family = AF_NETLINK;
    116 
    117 	if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0)
    118 		goto error;
    119 
    120 	return fd;
    121 error:
    122 	close(fd);
    123 	return -1;
    124 }
    125 
    126 int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
    127 	     __u8 genl_cmd, __u16 nla_type, void *nla_data, int nla_len)
    128 {
    129 	struct nlattr *na;
    130 	struct sockaddr_nl nladdr;
    131 	int r, buflen;
    132 	char *buf;
    133 
    134 	struct msgtemplate msg;
    135 
    136 	msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
    137 	msg.n.nlmsg_type = nlmsg_type;
    138 	msg.n.nlmsg_flags = NLM_F_REQUEST;
    139 	msg.n.nlmsg_seq = 0;
    140 	msg.n.nlmsg_pid = nlmsg_pid;
    141 	msg.g.cmd = genl_cmd;
    142 	msg.g.version = 0x1;
    143 	na = (struct nlattr *)GENLMSG_DATA(&msg);
    144 	na->nla_type = nla_type;
    145 	na->nla_len = nla_len + 1 + NLA_HDRLEN;
    146 	memcpy(NLA_DATA(na), nla_data, nla_len);
    147 	msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
    148 
    149 	buf = (char *)&msg;
    150 	buflen = msg.n.nlmsg_len;
    151 	memset(&nladdr, 0, sizeof(nladdr));
    152 	nladdr.nl_family = AF_NETLINK;
    153 	while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *)&nladdr,
    154 			   sizeof(nladdr))) < buflen) {
    155 		if (r > 0) {
    156 			buf += r;
    157 			buflen -= r;
    158 		} else if (errno != EAGAIN)
    159 			return -1;
    160 	}
    161 	return 0;
    162 }
    163 
    164 /*
    165  * Probe the controller in genetlink to find the family id
    166  * for the TASKSTATS family
    167  */
    168 int get_family_id(int sd)
    169 {
    170 	struct {
    171 		struct nlmsghdr n;
    172 		struct genlmsghdr g;
    173 		char buf[256];
    174 	} ans;
    175 
    176 	int id = 0, rc;
    177 	struct nlattr *na;
    178 	int rep_len;
    179 
    180 	strcpy(name, TASKSTATS_GENL_NAME);
    181 	rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY,
    182 		      CTRL_ATTR_FAMILY_NAME, (void *)name,
    183 		      strlen(TASKSTATS_GENL_NAME) + 1);
    184 
    185 	rep_len = recv(sd, &ans, sizeof(ans), 0);
    186 	if (ans.n.nlmsg_type == NLMSG_ERROR ||
    187 	    (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len))
    188 		return 0;
    189 
    190 	na = (struct nlattr *)GENLMSG_DATA(&ans);
    191 	na = (struct nlattr *)((char *)na + NLA_ALIGN(na->nla_len));
    192 	if (na->nla_type == CTRL_ATTR_FAMILY_ID) {
    193 		id = *(__u16 *) NLA_DATA(na);
    194 	}
    195 	return id;
    196 }
    197 
    198 void print_delayacct(struct taskstats *t)
    199 {
    200 	printf("\n\nCPU   %15s%15s%15s%15s\n"
    201 	       "      %15llu%15llu%15llu%15llu\n"
    202 	       "IO    %15s%15s\n"
    203 	       "      %15llu%15llu\n"
    204 	       "SWAP  %15s%15s\n" "      %15llu%15llu\n" "RECLAIM  %12s%15s\n"
    205 #ifdef HAVE_STRUCT_TASKSTATS_FREEPAGES_COUNT
    206 	       "      %15llu%15llu\n"
    207 #endif
    208 	       , "count", "real total", "virtual total", "delay total",
    209 	       (unsigned long long)t->cpu_count,
    210 	       (unsigned long long)t->cpu_run_real_total,
    211 	       (unsigned long long)t->cpu_run_virtual_total,
    212 	       (unsigned long long)t->cpu_delay_total,
    213 	       "count", "delay total",
    214 	       (unsigned long long)t->blkio_count,
    215 	       (unsigned long long)t->blkio_delay_total,
    216 	       "count", "delay total",
    217 	       (unsigned long long)t->swapin_count,
    218 	       (unsigned long long)t->swapin_delay_total, "count", "delay total"
    219 #ifdef HAVE_STRUCT_TASKSTATS_FREEPAGES_COUNT
    220 	       , (unsigned long long)t->freepages_count,
    221 	       (unsigned long long)t->freepages_delay_total
    222 #endif
    223 	    );
    224 }
    225 
    226 void task_context_switch_counts(struct taskstats *t)
    227 {
    228 #ifdef HAVE_STRUCT_TASKSTATS_NVCSW
    229 	printf("\n\nTask   %15s%15s\n"
    230 	       "	%15llu%15llu\n",
    231 	       "voluntary", "nonvoluntary",
    232 	       (unsigned long long)t->nvcsw, (unsigned long long)t->nivcsw);
    233 #endif
    234 }
    235 
    236 #ifdef HAVE_LINUX_CGROUPSTATS_H
    237 void print_cgroupstats(struct cgroupstats *c)
    238 {
    239 	printf("sleeping %llu, blocked %llu, running %llu, stopped %llu, "
    240 	       "uninterruptible %llu\n", (unsigned long long)c->nr_sleeping,
    241 	       (unsigned long long)c->nr_io_wait,
    242 	       (unsigned long long)c->nr_running,
    243 	       (unsigned long long)c->nr_stopped,
    244 	       (unsigned long long)c->nr_uninterruptible);
    245 }
    246 #endif
    247 
    248 void print_ioacct(struct taskstats *t)
    249 {
    250 #ifdef HAVE_STRUCT_TASKSTATS_READ_BYTES
    251 	printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n",
    252 	       t->ac_comm,
    253 	       (unsigned long long)t->read_bytes,
    254 	       (unsigned long long)t->write_bytes,
    255 	       (unsigned long long)t->cancelled_write_bytes);
    256 #endif
    257 }
    258 
    259 int main(int argc, char *argv[])
    260 {
    261 	int c, rc, rep_len, aggr_len, len2, cmd_type = 0;
    262 	__u16 id;
    263 	__u32 mypid;
    264 
    265 	struct nlattr *na;
    266 	int nl_sd = -1;
    267 	int len = 0;
    268 	pid_t tid = 0;
    269 	pid_t rtid = 0;
    270 
    271 	int fd = 0;
    272 	int count = 0;
    273 	int write_file = 0;
    274 	int maskset = 0;
    275 	char *logfile = NULL;
    276 	int loop = 0;
    277 	int containerset = 0;
    278 	char containerpath[1024];
    279 	int cfd = 0;
    280 
    281 	struct msgtemplate msg;
    282 
    283 	while (1) {
    284 		c = getopt(argc, argv, "qdiw:r:m:t:p:vlC:");
    285 		if (c < 0)
    286 			break;
    287 
    288 		switch (c) {
    289 		case 'd':
    290 			printf("print delayacct stats ON\n");
    291 			print_delays = 1;
    292 			break;
    293 		case 'i':
    294 			printf("printing IO accounting\n");
    295 			print_io_accounting = 1;
    296 			break;
    297 		case 'q':
    298 			printf("printing task/process context switch rates\n");
    299 			print_task_context_switch_counts = 1;
    300 			break;
    301 		case 'C':
    302 			containerset = 1;
    303 			strncpy(containerpath, optarg, strlen(optarg) + 1);
    304 			break;
    305 		case 'w':
    306 			logfile = strdup(optarg);
    307 			printf("write to file %s\n", logfile);
    308 			write_file = 1;
    309 			break;
    310 		case 'r':
    311 			rcvbufsz = atoi(optarg);
    312 			printf("receive buf size %d\n", rcvbufsz);
    313 			if (rcvbufsz < 0)
    314 				err(1, "Invalid rcv buf size\n");
    315 			break;
    316 		case 'm':
    317 			strncpy(cpumask, optarg, sizeof(cpumask));
    318 			maskset = 1;
    319 			printf("cpumask %s maskset %d\n", cpumask, maskset);
    320 			break;
    321 		case 't':
    322 			tid = atoi(optarg);
    323 			if (!tid)
    324 				err(1, "Invalid tgid\n");
    325 			cmd_type = TASKSTATS_CMD_ATTR_TGID;
    326 			break;
    327 		case 'p':
    328 			tid = atoi(optarg);
    329 			if (!tid)
    330 				err(1, "Invalid pid\n");
    331 			cmd_type = TASKSTATS_CMD_ATTR_PID;
    332 			break;
    333 		case 'v':
    334 			printf("debug on\n");
    335 			dbg = 1;
    336 			break;
    337 		case 'l':
    338 			printf("listen forever\n");
    339 			loop = 1;
    340 			break;
    341 		default:
    342 			usage();
    343 			exit(1);
    344 		}
    345 	}
    346 
    347 	if (write_file) {
    348 		fd = open(logfile, O_WRONLY | O_CREAT | O_TRUNC,
    349 			  S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
    350 		if (fd == -1) {
    351 			perror("Cannot open output file\n");
    352 			exit(1);
    353 		}
    354 	}
    355 
    356 	if ((nl_sd = create_nl_socket(NETLINK_GENERIC)) < 0)
    357 		err(1, "error creating Netlink socket\n");
    358 
    359 	mypid = getpid();
    360 	id = get_family_id(nl_sd);
    361 	if (!id) {
    362 		fprintf(stderr, "Error getting family id, errno %d\n", errno);
    363 		exit(1);
    364 	}
    365 	PRINTF("family id %d\n", id);
    366 
    367 	if (maskset) {
    368 		rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
    369 			      TASKSTATS_CMD_ATTR_REGISTER_CPUMASK,
    370 			      &cpumask, strlen(cpumask) + 1);
    371 		PRINTF("Sent register cpumask, retval %d\n", rc);
    372 		if (rc < 0) {
    373 			fprintf(stderr, "error sending register cpumask\n");
    374 			exit(1);
    375 		}
    376 	}
    377 
    378 	if (tid && containerset) {
    379 		fprintf(stderr, "Select either -t or -C, not both\n");
    380 		exit(1);
    381 	}
    382 
    383 	if (tid) {
    384 		rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
    385 			      cmd_type, &tid, sizeof(__u32));
    386 		PRINTF("Sent pid/tgid, retval %d\n", rc);
    387 		if (rc < 0) {
    388 			fprintf(stderr, "error sending tid/tgid cmd\n");
    389 			exit(1);
    390 		}
    391 	}
    392 
    393 	if (containerset) {
    394 		cfd = open(containerpath, O_RDONLY);
    395 		if (cfd < 0) {
    396 			perror("error opening container file");
    397 			exit(1);
    398 		}
    399 #ifdef HAVE_LINUX_CGROUPSTATS_H
    400 		rc = send_cmd(nl_sd, id, mypid, CGROUPSTATS_CMD_GET,
    401 			      CGROUPSTATS_CMD_ATTR_FD, &cfd, sizeof(__u32));
    402 #else
    403 		errno = ENOSYS;
    404 		rc = -1;
    405 #endif
    406 		if (rc < 0) {
    407 			perror("error sending cgroupstats command");
    408 			exit(1);
    409 		}
    410 	}
    411 	if (!maskset && !tid && !containerset) {
    412 		usage();
    413 		exit(1);
    414 	}
    415 
    416 	do {
    417 		int i;
    418 
    419 		rep_len = recv(nl_sd, &msg, sizeof(msg), 0);
    420 		PRINTF("received %d bytes\n", rep_len);
    421 
    422 		if (rep_len < 0) {
    423 			fprintf(stderr, "nonfatal reply error: errno %d\n",
    424 				errno);
    425 			exit(1);
    426 		}
    427 		if (msg.n.nlmsg_type == NLMSG_ERROR ||
    428 		    !NLMSG_OK((&msg.n), rep_len)) {
    429 			struct nlmsgerr *err = NLMSG_DATA(&msg);
    430 			fprintf(stderr, "fatal reply error,  errno %d\n",
    431 				err->error);
    432 			exit(1);
    433 		}
    434 
    435 		PRINTF("nlmsghdr size=%zu, nlmsg_len=%d, rep_len=%d\n",
    436 		       sizeof(struct nlmsghdr), msg.n.nlmsg_len, rep_len);
    437 
    438 		rep_len = GENLMSG_PAYLOAD(&msg.n);
    439 
    440 		na = (struct nlattr *)GENLMSG_DATA(&msg);
    441 		len = 0;
    442 		i = 0;
    443 		while (len < rep_len) {
    444 			len += NLA_ALIGN(na->nla_len);
    445 			switch (na->nla_type) {
    446 			case TASKSTATS_TYPE_AGGR_TGID:
    447 				/* Fall through */
    448 			case TASKSTATS_TYPE_AGGR_PID:
    449 				aggr_len = NLA_PAYLOAD(na->nla_len);
    450 				len2 = 0;
    451 				/* For nested attributes, na follows */
    452 				na = (struct nlattr *)NLA_DATA(na);
    453 				done = 0;
    454 				while (len2 < aggr_len) {
    455 					switch (na->nla_type) {
    456 					case TASKSTATS_TYPE_PID:
    457 						rtid = *(int *)NLA_DATA(na);
    458 						if (print_delays)
    459 							printf("PID\t%d\n",
    460 							       rtid);
    461 						break;
    462 					case TASKSTATS_TYPE_TGID:
    463 						rtid = *(int *)NLA_DATA(na);
    464 						if (print_delays)
    465 							printf("TGID\t%d\n",
    466 							       rtid);
    467 						break;
    468 					case TASKSTATS_TYPE_STATS:
    469 						count++;
    470 						if (print_delays)
    471 							print_delayacct((struct
    472 									 taskstats
    473 									 *)
    474 									NLA_DATA
    475 									(na));
    476 						if (print_io_accounting)
    477 							print_ioacct((struct
    478 								      taskstats
    479 								      *)
    480 								     NLA_DATA
    481 								     (na));
    482 						if (print_task_context_switch_counts)
    483 							task_context_switch_counts
    484 							    ((struct taskstats
    485 							      *)NLA_DATA(na));
    486 						if (fd) {
    487 							if (write
    488 							    (fd, NLA_DATA(na),
    489 							     na->nla_len) < 0) {
    490 								err(1,
    491 								    "write error\n");
    492 							}
    493 						}
    494 						if (!loop)
    495 							goto done;
    496 						break;
    497 					default:
    498 						fprintf(stderr, "Unknown nested"
    499 							" nla_type %d\n",
    500 							na->nla_type);
    501 						break;
    502 					}
    503 					len2 += NLA_ALIGN(na->nla_len);
    504 					na = (struct nlattr *)((char *)na +
    505 							       len2);
    506 				}
    507 				break;
    508 #if HAVE_LINUX_CGROUPSTATS_H
    509 			case CGROUPSTATS_TYPE_CGROUP_STATS:
    510 				print_cgroupstats(NLA_DATA(na));
    511 				break;
    512 #endif
    513 			default:
    514 				fprintf(stderr, "Unknown nla_type %d\n",
    515 					na->nla_type);
    516 				exit(1);
    517 			}
    518 			na = (struct nlattr *)(GENLMSG_DATA(&msg) + len);
    519 		}
    520 	} while (loop);
    521 done:
    522 	if (maskset) {
    523 		rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
    524 			      TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK,
    525 			      &cpumask, strlen(cpumask) + 1);
    526 		printf("Sent deregister mask, retval %d\n", rc);
    527 		if (rc < 0)
    528 			err(rc, "error sending deregister cpumask\n");
    529 	}
    530 
    531 	close(nl_sd);
    532 	if (fd)
    533 		close(fd);
    534 	if (cfd)
    535 		close(cfd);
    536 	return 0;
    537 }
    538 #else
    539 int main(void)
    540 {
    541 	printf("System doesn't have needed netlink / taskstats support.\n");
    542 	return 1;
    543 }
    544 #endif
    545