Home | History | Annotate | Download | only in ip
      1 /*
      2  * ipvrf.c	"ip vrf"
      3  *
      4  *		This program is free software; you can redistribute it and/or
      5  *		modify it under the terms of the GNU General Public License
      6  *		as published by the Free Software Foundation; either version
      7  *		2 of the License, or (at your option) any later version.
      8  *
      9  * Authors:	David Ahern <dsa (at) cumulusnetworks.com>
     10  *
     11  */
     12 
     13 #include <sys/types.h>
     14 #include <sys/stat.h>
     15 #include <sys/socket.h>
     16 #include <sys/mount.h>
     17 #include <linux/bpf.h>
     18 #include <linux/if.h>
     19 #include <fcntl.h>
     20 #include <stdio.h>
     21 #include <stdlib.h>
     22 #include <unistd.h>
     23 #include <string.h>
     24 #include <dirent.h>
     25 #include <errno.h>
     26 #include <limits.h>
     27 
     28 #include "rt_names.h"
     29 #include "utils.h"
     30 #include "ip_common.h"
     31 #include "bpf_util.h"
     32 
     33 #define CGRP_PROC_FILE  "/cgroup.procs"
     34 
     35 static struct link_filter vrf_filter;
     36 
     37 static void usage(void)
     38 {
     39 	fprintf(stderr, "Usage: ip vrf show [NAME] ...\n");
     40 	fprintf(stderr, "       ip vrf exec [NAME] cmd ...\n");
     41 	fprintf(stderr, "       ip vrf identify [PID]\n");
     42 	fprintf(stderr, "       ip vrf pids [NAME]\n");
     43 
     44 	exit(-1);
     45 }
     46 
     47 /*
     48  * parse process based cgroup file looking for PATH/vrf/NAME where
     49  * NAME is the name of the vrf the process is associated with
     50  */
     51 static int vrf_identify(pid_t pid, char *name, size_t len)
     52 {
     53 	char path[PATH_MAX];
     54 	char buf[4096];
     55 	char *vrf, *end;
     56 	FILE *fp;
     57 
     58 	snprintf(path, sizeof(path), "/proc/%d/cgroup", pid);
     59 	fp = fopen(path, "r");
     60 	if (!fp)
     61 		return -1;
     62 
     63 	memset(name, 0, len);
     64 
     65 	while (fgets(buf, sizeof(buf), fp)) {
     66 		/* want the controller-less cgroup */
     67 		if (strstr(buf, "::/") == NULL)
     68 			continue;
     69 
     70 		vrf = strstr(buf, "/vrf/");
     71 		if (vrf) {
     72 			vrf += 5;  /* skip past "/vrf/" */
     73 			end = strchr(vrf, '\n');
     74 			if (end)
     75 				*end = '\0';
     76 
     77 			strlcpy(name, vrf, len);
     78 			break;
     79 		}
     80 	}
     81 
     82 	fclose(fp);
     83 
     84 	return 0;
     85 }
     86 
     87 static int ipvrf_identify(int argc, char **argv)
     88 {
     89 	char vrf[32];
     90 	int rc;
     91 	unsigned int pid;
     92 
     93 	if (argc < 1)
     94 		pid = getpid();
     95 	else if (argc > 1)
     96 		invarg("Extra arguments specified\n", argv[1]);
     97 	else if (get_unsigned(&pid, argv[0], 10))
     98 		invarg("Invalid pid\n", argv[0]);
     99 
    100 	rc = vrf_identify(pid, vrf, sizeof(vrf));
    101 	if (!rc) {
    102 		if (vrf[0] != '\0')
    103 			printf("%s\n", vrf);
    104 	} else {
    105 		fprintf(stderr, "Failed to lookup vrf association: %s\n",
    106 			strerror(errno));
    107 	}
    108 
    109 	return rc;
    110 }
    111 
    112 /* read PATH/vrf/NAME/cgroup.procs file */
    113 static void read_cgroup_pids(const char *base_path, char *name)
    114 {
    115 	char path[PATH_MAX];
    116 	char buf[4096];
    117 	FILE *fp;
    118 
    119 	if (snprintf(path, sizeof(path), "%s/vrf/%s%s",
    120 		     base_path, name, CGRP_PROC_FILE) >= sizeof(path))
    121 		return;
    122 
    123 	fp = fopen(path, "r");
    124 	if (!fp)
    125 		return; /* no cgroup file, nothing to show */
    126 
    127 	/* dump contents (pids) of cgroup.procs */
    128 	while (fgets(buf, sizeof(buf), fp)) {
    129 		char *nl, comm[32];
    130 
    131 		nl = strchr(buf, '\n');
    132 		if (nl)
    133 			*nl = '\0';
    134 
    135 		if (get_command_name(buf, comm, sizeof(comm)))
    136 			strcpy(comm, "<terminated?>");
    137 
    138 		printf("%5s  %s\n", buf, comm);
    139 	}
    140 
    141 	fclose(fp);
    142 }
    143 
    144 /* recurse path looking for PATH[/NETNS]/vrf/NAME */
    145 static int recurse_dir(char *base_path, char *name, const char *netns)
    146 {
    147 	char path[PATH_MAX];
    148 	struct dirent *de;
    149 	struct stat fstat;
    150 	int rc;
    151 	DIR *d;
    152 
    153 	d = opendir(base_path);
    154 	if (!d)
    155 		return -1;
    156 
    157 	while ((de = readdir(d)) != NULL) {
    158 		if (!strcmp(de->d_name, ".") || !strcmp(de->d_name, ".."))
    159 			continue;
    160 
    161 		if (!strcmp(de->d_name, "vrf")) {
    162 			const char *pdir = strrchr(base_path, '/');
    163 
    164 			/* found a 'vrf' directory. if it is for the given
    165 			 * namespace then dump the cgroup pids
    166 			 */
    167 			if (*netns == '\0' ||
    168 			    (pdir && !strcmp(pdir+1, netns)))
    169 				read_cgroup_pids(base_path, name);
    170 
    171 			continue;
    172 		}
    173 
    174 		/* is this a subdir that needs to be walked */
    175 		if (snprintf(path, sizeof(path), "%s/%s",
    176 			     base_path, de->d_name) >= sizeof(path))
    177 			continue;
    178 
    179 		if (lstat(path, &fstat) < 0)
    180 			continue;
    181 
    182 		if (S_ISDIR(fstat.st_mode)) {
    183 			rc = recurse_dir(path, name, netns);
    184 			if (rc != 0)
    185 				goto out;
    186 		}
    187 	}
    188 
    189 	rc = 0;
    190 out:
    191 	closedir(d);
    192 
    193 	return rc;
    194 }
    195 
    196 static int ipvrf_get_netns(char *netns, int len)
    197 {
    198 	if (netns_identify_pid("self", netns, len-3)) {
    199 		fprintf(stderr, "Failed to get name of network namespace: %s\n",
    200 			strerror(errno));
    201 		return -1;
    202 	}
    203 
    204 	if (*netns != '\0')
    205 		strcat(netns, "-ns");
    206 
    207 	return 0;
    208 }
    209 
    210 static int ipvrf_pids(int argc, char **argv)
    211 {
    212 	char *mnt, *vrf;
    213 	char netns[256];
    214 	int ret = -1;
    215 
    216 	if (argc != 1) {
    217 		fprintf(stderr, "Invalid arguments\n");
    218 		return -1;
    219 	}
    220 
    221 	vrf = argv[0];
    222 	if (!name_is_vrf(vrf)) {
    223 		fprintf(stderr, "Invalid VRF name\n");
    224 		return -1;
    225 	}
    226 
    227 	mnt = find_cgroup2_mount();
    228 	if (!mnt)
    229 		return -1;
    230 
    231 	if (ipvrf_get_netns(netns, sizeof(netns)) < 0)
    232 		goto out;
    233 
    234 	ret = recurse_dir(mnt, vrf, netns);
    235 
    236 out:
    237 	free(mnt);
    238 
    239 	return ret;
    240 }
    241 
    242 /* load BPF program to set sk_bound_dev_if for sockets */
    243 static char bpf_log_buf[256*1024];
    244 
    245 static int prog_load(int idx)
    246 {
    247 	struct bpf_insn prog[] = {
    248 		BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
    249 		BPF_MOV64_IMM(BPF_REG_3, idx),
    250 		BPF_MOV64_IMM(BPF_REG_2,
    251 			      offsetof(struct bpf_sock, bound_dev_if)),
    252 		BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
    253 			    offsetof(struct bpf_sock, bound_dev_if)),
    254 		BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */
    255 		BPF_EXIT_INSN(),
    256 	};
    257 
    258 	return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, prog, sizeof(prog),
    259 			     "GPL", bpf_log_buf, sizeof(bpf_log_buf));
    260 }
    261 
    262 static int vrf_configure_cgroup(const char *path, int ifindex)
    263 {
    264 	int rc = -1, cg_fd, prog_fd = -1;
    265 
    266 	cg_fd = open(path, O_DIRECTORY | O_RDONLY);
    267 	if (cg_fd < 0) {
    268 		fprintf(stderr,
    269 			"Failed to open cgroup path: '%s'\n",
    270 			strerror(errno));
    271 		goto out;
    272 	}
    273 
    274 	/*
    275 	 * Load bpf program into kernel and attach to cgroup to affect
    276 	 * socket creates
    277 	 */
    278 	prog_fd = prog_load(ifindex);
    279 	if (prog_fd < 0) {
    280 		fprintf(stderr, "Failed to load BPF prog: '%s'\n",
    281 			strerror(errno));
    282 
    283 		if (errno != EPERM) {
    284 			fprintf(stderr,
    285 				"Kernel compiled with CGROUP_BPF enabled?\n");
    286 		}
    287 		goto out;
    288 	}
    289 
    290 	if (bpf_prog_attach_fd(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE)) {
    291 		fprintf(stderr, "Failed to attach prog to cgroup: '%s'\n",
    292 			strerror(errno));
    293 		goto out;
    294 	}
    295 
    296 	rc = 0;
    297 out:
    298 	close(cg_fd);
    299 	close(prog_fd);
    300 
    301 	return rc;
    302 }
    303 
    304 /* get base path for controller-less cgroup for a process.
    305  * path returned does not include /vrf/NAME if it exists
    306  */
    307 static int vrf_path(char *vpath, size_t len)
    308 {
    309 	char path[PATH_MAX];
    310 	char buf[4096];
    311 	char *vrf;
    312 	FILE *fp;
    313 
    314 	snprintf(path, sizeof(path), "/proc/%d/cgroup", getpid());
    315 	fp = fopen(path, "r");
    316 	if (!fp)
    317 		return -1;
    318 
    319 	vpath[0] = '\0';
    320 
    321 	while (fgets(buf, sizeof(buf), fp)) {
    322 		char *start, *nl;
    323 
    324 		start = strstr(buf, "::/");
    325 		if (!start)
    326 			continue;
    327 
    328 		/* advance past '::' */
    329 		start += 2;
    330 
    331 		nl = strchr(start, '\n');
    332 		if (nl)
    333 			*nl = '\0';
    334 
    335 		vrf = strstr(start, "/vrf");
    336 		if (vrf)
    337 			*vrf = '\0';
    338 
    339 		strlcpy(vpath, start, len);
    340 
    341 		/* if vrf path is just / then return nothing */
    342 		if (!strcmp(vpath, "/"))
    343 			vpath[0] = '\0';
    344 
    345 		break;
    346 	}
    347 
    348 	fclose(fp);
    349 
    350 	return 0;
    351 }
    352 
    353 static int vrf_switch(const char *name)
    354 {
    355 	char path[PATH_MAX], *mnt, pid[16];
    356 	char vpath[PATH_MAX], netns[256];
    357 	int ifindex = 0;
    358 	int rc = -1, len, fd = -1;
    359 
    360 	if (strcmp(name, "default")) {
    361 		ifindex = name_is_vrf(name);
    362 		if (!ifindex) {
    363 			fprintf(stderr, "Invalid VRF name\n");
    364 			return -1;
    365 		}
    366 	}
    367 
    368 	mnt = find_cgroup2_mount();
    369 	if (!mnt)
    370 		return -1;
    371 
    372 	/* -1 on length to add '/' to the end */
    373 	if (ipvrf_get_netns(netns, sizeof(netns) - 1) < 0)
    374 		goto out;
    375 
    376 	if (vrf_path(vpath, sizeof(vpath)) < 0) {
    377 		fprintf(stderr, "Failed to get base cgroup path: %s\n",
    378 			strerror(errno));
    379 		goto out;
    380 	}
    381 
    382 	/* if path already ends in netns then don't add it again */
    383 	if (*netns != '\0') {
    384 		char *pdir = strrchr(vpath, '/');
    385 
    386 		if (!pdir)
    387 			pdir = vpath;
    388 		else
    389 			pdir++;
    390 
    391 		if (strcmp(pdir, netns) == 0)
    392 			*pdir = '\0';
    393 
    394 		strcat(netns, "/");
    395 	}
    396 
    397 	/* path to cgroup; make sure buffer has room to cat "/cgroup.procs"
    398 	 * to the end of the path
    399 	 */
    400 	len = snprintf(path, sizeof(path) - sizeof(CGRP_PROC_FILE),
    401 		       "%s%s/%svrf/%s",
    402 		       mnt, vpath, netns, ifindex ? name : "");
    403 	if (len > sizeof(path) - sizeof(CGRP_PROC_FILE)) {
    404 		fprintf(stderr, "Invalid path to cgroup2 mount\n");
    405 		goto out;
    406 	}
    407 
    408 	if (make_path(path, 0755)) {
    409 		fprintf(stderr, "Failed to setup vrf cgroup2 directory\n");
    410 		goto out;
    411 	}
    412 
    413 	if (ifindex && vrf_configure_cgroup(path, ifindex))
    414 		goto out;
    415 
    416 	/*
    417 	 * write pid to cgroup.procs making process part of cgroup
    418 	 */
    419 	strcat(path, CGRP_PROC_FILE);
    420 	fd = open(path, O_RDWR | O_APPEND);
    421 	if (fd < 0) {
    422 		fprintf(stderr, "Failed to open cgroups.procs file: %s.\n",
    423 			strerror(errno));
    424 		goto out;
    425 	}
    426 
    427 	snprintf(pid, sizeof(pid), "%d", getpid());
    428 	if (write(fd, pid, strlen(pid)) < 0) {
    429 		fprintf(stderr, "Failed to join cgroup\n");
    430 		goto out2;
    431 	}
    432 
    433 	rc = 0;
    434 out2:
    435 	close(fd);
    436 out:
    437 	free(mnt);
    438 
    439 	return rc;
    440 }
    441 
    442 static int ipvrf_exec(int argc, char **argv)
    443 {
    444 	if (argc < 1) {
    445 		fprintf(stderr, "No VRF name specified\n");
    446 		return -1;
    447 	}
    448 	if (argc < 2) {
    449 		fprintf(stderr, "No command specified\n");
    450 		return -1;
    451 	}
    452 
    453 	if (vrf_switch(argv[0]))
    454 		return -1;
    455 
    456 	return -cmd_exec(argv[1], argv + 1, !!batch_mode);
    457 }
    458 
    459 /* reset VRF association of current process to default VRF;
    460  * used by netns_exec
    461  */
    462 void vrf_reset(void)
    463 {
    464 	char vrf[32];
    465 
    466 	if (vrf_identify(getpid(), vrf, sizeof(vrf)) ||
    467 	    (vrf[0] == '\0'))
    468 		return;
    469 
    470 	vrf_switch("default");
    471 }
    472 
    473 static int ipvrf_filter_req(struct nlmsghdr *nlh, int reqlen)
    474 {
    475 	struct rtattr *linkinfo;
    476 	int err;
    477 
    478 	if (vrf_filter.kind) {
    479 		linkinfo = addattr_nest(nlh, reqlen, IFLA_LINKINFO);
    480 
    481 		err = addattr_l(nlh, reqlen, IFLA_INFO_KIND, vrf_filter.kind,
    482 				strlen(vrf_filter.kind));
    483 		if (err)
    484 			return err;
    485 
    486 		addattr_nest_end(nlh, linkinfo);
    487 	}
    488 
    489 	return 0;
    490 }
    491 
    492 /* input arg is linkinfo */
    493 static __u32 vrf_table_linkinfo(struct rtattr *li[])
    494 {
    495 	struct rtattr *attr[IFLA_VRF_MAX + 1];
    496 
    497 	if (li[IFLA_INFO_DATA]) {
    498 		parse_rtattr_nested(attr, IFLA_VRF_MAX, li[IFLA_INFO_DATA]);
    499 
    500 		if (attr[IFLA_VRF_TABLE])
    501 			return rta_getattr_u32(attr[IFLA_VRF_TABLE]);
    502 	}
    503 
    504 	return 0;
    505 }
    506 
    507 static int ipvrf_print(struct nlmsghdr *n)
    508 {
    509 	struct ifinfomsg *ifi = NLMSG_DATA(n);
    510 	struct rtattr *tb[IFLA_MAX+1];
    511 	struct rtattr *li[IFLA_INFO_MAX+1];
    512 	int len = n->nlmsg_len;
    513 	const char *name;
    514 	__u32 tb_id;
    515 
    516 	len -= NLMSG_LENGTH(sizeof(*ifi));
    517 	if (len < 0)
    518 		return 0;
    519 
    520 	if (vrf_filter.ifindex && vrf_filter.ifindex != ifi->ifi_index)
    521 		return 0;
    522 
    523 	parse_rtattr(tb, IFLA_MAX, IFLA_RTA(ifi), len);
    524 
    525 	/* kernel does not support filter by master device */
    526 	if (tb[IFLA_MASTER]) {
    527 		int master = *(int *)RTA_DATA(tb[IFLA_MASTER]);
    528 
    529 		if (vrf_filter.master && master != vrf_filter.master)
    530 			return 0;
    531 	}
    532 
    533 	if (!tb[IFLA_IFNAME]) {
    534 		fprintf(stderr,
    535 			"BUG: device with ifindex %d has nil ifname\n",
    536 			ifi->ifi_index);
    537 		return 0;
    538 	}
    539 	name = rta_getattr_str(tb[IFLA_IFNAME]);
    540 
    541 	/* missing LINKINFO means not VRF. e.g., kernel does not
    542 	 * support filtering on kind, so userspace needs to handle
    543 	 */
    544 	if (!tb[IFLA_LINKINFO])
    545 		return 0;
    546 
    547 	parse_rtattr_nested(li, IFLA_INFO_MAX, tb[IFLA_LINKINFO]);
    548 
    549 	if (!li[IFLA_INFO_KIND])
    550 		return 0;
    551 
    552 	if (strcmp(RTA_DATA(li[IFLA_INFO_KIND]), "vrf"))
    553 		return 0;
    554 
    555 	tb_id = vrf_table_linkinfo(li);
    556 	if (!tb_id) {
    557 		fprintf(stderr,
    558 			"BUG: VRF %s is missing table id\n", name);
    559 		return 0;
    560 	}
    561 
    562 	printf("%-16s %5u", name, tb_id);
    563 
    564 	printf("\n");
    565 	return 1;
    566 }
    567 
    568 static int ipvrf_show(int argc, char **argv)
    569 {
    570 	struct nlmsg_chain linfo = { NULL, NULL};
    571 	int rc = 0;
    572 
    573 	vrf_filter.kind = "vrf";
    574 
    575 	if (argc > 1)
    576 		usage();
    577 
    578 	if (argc == 1) {
    579 		__u32 tb_id;
    580 
    581 		tb_id = ipvrf_get_table(argv[0]);
    582 		if (!tb_id) {
    583 			fprintf(stderr, "Invalid VRF\n");
    584 			return 1;
    585 		}
    586 		printf("%s %u\n", argv[0], tb_id);
    587 		return 0;
    588 	}
    589 
    590 	if (ip_linkaddr_list(0, ipvrf_filter_req, &linfo, NULL) == 0) {
    591 		struct nlmsg_list *l;
    592 		unsigned nvrf = 0;
    593 		int n;
    594 
    595 		n = printf("%-16s  %5s\n", "Name", "Table");
    596 		printf("%.*s\n", n-1, "-----------------------");
    597 		for (l = linfo.head; l; l = l->next)
    598 			nvrf += ipvrf_print(&l->h);
    599 
    600 		if (!nvrf)
    601 			printf("No VRF has been configured\n");
    602 	} else
    603 		rc = 1;
    604 
    605 	free_nlmsg_chain(&linfo);
    606 
    607 	return rc;
    608 }
    609 
    610 int do_ipvrf(int argc, char **argv)
    611 {
    612 	if (argc == 0)
    613 		return ipvrf_show(0, NULL);
    614 
    615 	if (matches(*argv, "identify") == 0)
    616 		return ipvrf_identify(argc-1, argv+1);
    617 
    618 	if (matches(*argv, "pids") == 0)
    619 		return ipvrf_pids(argc-1, argv+1);
    620 
    621 	if (matches(*argv, "exec") == 0)
    622 		return ipvrf_exec(argc-1, argv+1);
    623 
    624 	if (matches(*argv, "show") == 0 ||
    625 	    matches(*argv, "lst") == 0 ||
    626 	    matches(*argv, "list") == 0)
    627 		return ipvrf_show(argc-1, argv+1);
    628 
    629 	if (matches(*argv, "help") == 0)
    630 		usage();
    631 
    632 	fprintf(stderr, "Command \"%s\" is unknown, try \"ip vrf help\".\n",
    633 		*argv);
    634 
    635 	exit(-1);
    636 }
    637