1 /* 2 * ipvrf.c "ip vrf" 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: David Ahern <dsa (at) cumulusnetworks.com> 10 * 11 */ 12 13 #include <sys/types.h> 14 #include <sys/stat.h> 15 #include <sys/socket.h> 16 #include <sys/mount.h> 17 #include <linux/bpf.h> 18 #include <linux/if.h> 19 #include <fcntl.h> 20 #include <stdio.h> 21 #include <stdlib.h> 22 #include <unistd.h> 23 #include <string.h> 24 #include <dirent.h> 25 #include <errno.h> 26 #include <limits.h> 27 28 #include "rt_names.h" 29 #include "utils.h" 30 #include "ip_common.h" 31 #include "bpf_util.h" 32 33 #define CGRP_PROC_FILE "/cgroup.procs" 34 35 static struct link_filter vrf_filter; 36 37 static void usage(void) 38 { 39 fprintf(stderr, "Usage: ip vrf show [NAME] ...\n"); 40 fprintf(stderr, " ip vrf exec [NAME] cmd ...\n"); 41 fprintf(stderr, " ip vrf identify [PID]\n"); 42 fprintf(stderr, " ip vrf pids [NAME]\n"); 43 44 exit(-1); 45 } 46 47 /* 48 * parse process based cgroup file looking for PATH/vrf/NAME where 49 * NAME is the name of the vrf the process is associated with 50 */ 51 static int vrf_identify(pid_t pid, char *name, size_t len) 52 { 53 char path[PATH_MAX]; 54 char buf[4096]; 55 char *vrf, *end; 56 FILE *fp; 57 58 snprintf(path, sizeof(path), "/proc/%d/cgroup", pid); 59 fp = fopen(path, "r"); 60 if (!fp) 61 return -1; 62 63 memset(name, 0, len); 64 65 while (fgets(buf, sizeof(buf), fp)) { 66 /* want the controller-less cgroup */ 67 if (strstr(buf, "::/") == NULL) 68 continue; 69 70 vrf = strstr(buf, "/vrf/"); 71 if (vrf) { 72 vrf += 5; /* skip past "/vrf/" */ 73 end = strchr(vrf, '\n'); 74 if (end) 75 *end = '\0'; 76 77 strlcpy(name, vrf, len); 78 break; 79 } 80 } 81 82 fclose(fp); 83 84 return 0; 85 } 86 87 static int ipvrf_identify(int argc, char **argv) 88 { 89 char vrf[32]; 90 int rc; 91 unsigned int pid; 92 93 if (argc < 1) 94 pid = getpid(); 95 else if (argc > 1) 96 invarg("Extra arguments specified\n", argv[1]); 97 else if (get_unsigned(&pid, argv[0], 10)) 98 invarg("Invalid pid\n", argv[0]); 99 100 rc = vrf_identify(pid, vrf, sizeof(vrf)); 101 if (!rc) { 102 if (vrf[0] != '\0') 103 printf("%s\n", vrf); 104 } else { 105 fprintf(stderr, "Failed to lookup vrf association: %s\n", 106 strerror(errno)); 107 } 108 109 return rc; 110 } 111 112 /* read PATH/vrf/NAME/cgroup.procs file */ 113 static void read_cgroup_pids(const char *base_path, char *name) 114 { 115 char path[PATH_MAX]; 116 char buf[4096]; 117 FILE *fp; 118 119 if (snprintf(path, sizeof(path), "%s/vrf/%s%s", 120 base_path, name, CGRP_PROC_FILE) >= sizeof(path)) 121 return; 122 123 fp = fopen(path, "r"); 124 if (!fp) 125 return; /* no cgroup file, nothing to show */ 126 127 /* dump contents (pids) of cgroup.procs */ 128 while (fgets(buf, sizeof(buf), fp)) { 129 char *nl, comm[32]; 130 131 nl = strchr(buf, '\n'); 132 if (nl) 133 *nl = '\0'; 134 135 if (get_command_name(buf, comm, sizeof(comm))) 136 strcpy(comm, "<terminated?>"); 137 138 printf("%5s %s\n", buf, comm); 139 } 140 141 fclose(fp); 142 } 143 144 /* recurse path looking for PATH[/NETNS]/vrf/NAME */ 145 static int recurse_dir(char *base_path, char *name, const char *netns) 146 { 147 char path[PATH_MAX]; 148 struct dirent *de; 149 struct stat fstat; 150 int rc; 151 DIR *d; 152 153 d = opendir(base_path); 154 if (!d) 155 return -1; 156 157 while ((de = readdir(d)) != NULL) { 158 if (!strcmp(de->d_name, ".") || !strcmp(de->d_name, "..")) 159 continue; 160 161 if (!strcmp(de->d_name, "vrf")) { 162 const char *pdir = strrchr(base_path, '/'); 163 164 /* found a 'vrf' directory. if it is for the given 165 * namespace then dump the cgroup pids 166 */ 167 if (*netns == '\0' || 168 (pdir && !strcmp(pdir+1, netns))) 169 read_cgroup_pids(base_path, name); 170 171 continue; 172 } 173 174 /* is this a subdir that needs to be walked */ 175 if (snprintf(path, sizeof(path), "%s/%s", 176 base_path, de->d_name) >= sizeof(path)) 177 continue; 178 179 if (lstat(path, &fstat) < 0) 180 continue; 181 182 if (S_ISDIR(fstat.st_mode)) { 183 rc = recurse_dir(path, name, netns); 184 if (rc != 0) 185 goto out; 186 } 187 } 188 189 rc = 0; 190 out: 191 closedir(d); 192 193 return rc; 194 } 195 196 static int ipvrf_get_netns(char *netns, int len) 197 { 198 if (netns_identify_pid("self", netns, len-3)) { 199 fprintf(stderr, "Failed to get name of network namespace: %s\n", 200 strerror(errno)); 201 return -1; 202 } 203 204 if (*netns != '\0') 205 strcat(netns, "-ns"); 206 207 return 0; 208 } 209 210 static int ipvrf_pids(int argc, char **argv) 211 { 212 char *mnt, *vrf; 213 char netns[256]; 214 int ret = -1; 215 216 if (argc != 1) { 217 fprintf(stderr, "Invalid arguments\n"); 218 return -1; 219 } 220 221 vrf = argv[0]; 222 if (!name_is_vrf(vrf)) { 223 fprintf(stderr, "Invalid VRF name\n"); 224 return -1; 225 } 226 227 mnt = find_cgroup2_mount(); 228 if (!mnt) 229 return -1; 230 231 if (ipvrf_get_netns(netns, sizeof(netns)) < 0) 232 goto out; 233 234 ret = recurse_dir(mnt, vrf, netns); 235 236 out: 237 free(mnt); 238 239 return ret; 240 } 241 242 /* load BPF program to set sk_bound_dev_if for sockets */ 243 static char bpf_log_buf[256*1024]; 244 245 static int prog_load(int idx) 246 { 247 struct bpf_insn prog[] = { 248 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), 249 BPF_MOV64_IMM(BPF_REG_3, idx), 250 BPF_MOV64_IMM(BPF_REG_2, 251 offsetof(struct bpf_sock, bound_dev_if)), 252 BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, 253 offsetof(struct bpf_sock, bound_dev_if)), 254 BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */ 255 BPF_EXIT_INSN(), 256 }; 257 258 return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, prog, sizeof(prog), 259 "GPL", bpf_log_buf, sizeof(bpf_log_buf)); 260 } 261 262 static int vrf_configure_cgroup(const char *path, int ifindex) 263 { 264 int rc = -1, cg_fd, prog_fd = -1; 265 266 cg_fd = open(path, O_DIRECTORY | O_RDONLY); 267 if (cg_fd < 0) { 268 fprintf(stderr, 269 "Failed to open cgroup path: '%s'\n", 270 strerror(errno)); 271 goto out; 272 } 273 274 /* 275 * Load bpf program into kernel and attach to cgroup to affect 276 * socket creates 277 */ 278 prog_fd = prog_load(ifindex); 279 if (prog_fd < 0) { 280 fprintf(stderr, "Failed to load BPF prog: '%s'\n", 281 strerror(errno)); 282 283 if (errno != EPERM) { 284 fprintf(stderr, 285 "Kernel compiled with CGROUP_BPF enabled?\n"); 286 } 287 goto out; 288 } 289 290 if (bpf_prog_attach_fd(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE)) { 291 fprintf(stderr, "Failed to attach prog to cgroup: '%s'\n", 292 strerror(errno)); 293 goto out; 294 } 295 296 rc = 0; 297 out: 298 close(cg_fd); 299 close(prog_fd); 300 301 return rc; 302 } 303 304 /* get base path for controller-less cgroup for a process. 305 * path returned does not include /vrf/NAME if it exists 306 */ 307 static int vrf_path(char *vpath, size_t len) 308 { 309 char path[PATH_MAX]; 310 char buf[4096]; 311 char *vrf; 312 FILE *fp; 313 314 snprintf(path, sizeof(path), "/proc/%d/cgroup", getpid()); 315 fp = fopen(path, "r"); 316 if (!fp) 317 return -1; 318 319 vpath[0] = '\0'; 320 321 while (fgets(buf, sizeof(buf), fp)) { 322 char *start, *nl; 323 324 start = strstr(buf, "::/"); 325 if (!start) 326 continue; 327 328 /* advance past '::' */ 329 start += 2; 330 331 nl = strchr(start, '\n'); 332 if (nl) 333 *nl = '\0'; 334 335 vrf = strstr(start, "/vrf"); 336 if (vrf) 337 *vrf = '\0'; 338 339 strlcpy(vpath, start, len); 340 341 /* if vrf path is just / then return nothing */ 342 if (!strcmp(vpath, "/")) 343 vpath[0] = '\0'; 344 345 break; 346 } 347 348 fclose(fp); 349 350 return 0; 351 } 352 353 static int vrf_switch(const char *name) 354 { 355 char path[PATH_MAX], *mnt, pid[16]; 356 char vpath[PATH_MAX], netns[256]; 357 int ifindex = 0; 358 int rc = -1, len, fd = -1; 359 360 if (strcmp(name, "default")) { 361 ifindex = name_is_vrf(name); 362 if (!ifindex) { 363 fprintf(stderr, "Invalid VRF name\n"); 364 return -1; 365 } 366 } 367 368 mnt = find_cgroup2_mount(); 369 if (!mnt) 370 return -1; 371 372 /* -1 on length to add '/' to the end */ 373 if (ipvrf_get_netns(netns, sizeof(netns) - 1) < 0) 374 goto out; 375 376 if (vrf_path(vpath, sizeof(vpath)) < 0) { 377 fprintf(stderr, "Failed to get base cgroup path: %s\n", 378 strerror(errno)); 379 goto out; 380 } 381 382 /* if path already ends in netns then don't add it again */ 383 if (*netns != '\0') { 384 char *pdir = strrchr(vpath, '/'); 385 386 if (!pdir) 387 pdir = vpath; 388 else 389 pdir++; 390 391 if (strcmp(pdir, netns) == 0) 392 *pdir = '\0'; 393 394 strcat(netns, "/"); 395 } 396 397 /* path to cgroup; make sure buffer has room to cat "/cgroup.procs" 398 * to the end of the path 399 */ 400 len = snprintf(path, sizeof(path) - sizeof(CGRP_PROC_FILE), 401 "%s%s/%svrf/%s", 402 mnt, vpath, netns, ifindex ? name : ""); 403 if (len > sizeof(path) - sizeof(CGRP_PROC_FILE)) { 404 fprintf(stderr, "Invalid path to cgroup2 mount\n"); 405 goto out; 406 } 407 408 if (make_path(path, 0755)) { 409 fprintf(stderr, "Failed to setup vrf cgroup2 directory\n"); 410 goto out; 411 } 412 413 if (ifindex && vrf_configure_cgroup(path, ifindex)) 414 goto out; 415 416 /* 417 * write pid to cgroup.procs making process part of cgroup 418 */ 419 strcat(path, CGRP_PROC_FILE); 420 fd = open(path, O_RDWR | O_APPEND); 421 if (fd < 0) { 422 fprintf(stderr, "Failed to open cgroups.procs file: %s.\n", 423 strerror(errno)); 424 goto out; 425 } 426 427 snprintf(pid, sizeof(pid), "%d", getpid()); 428 if (write(fd, pid, strlen(pid)) < 0) { 429 fprintf(stderr, "Failed to join cgroup\n"); 430 goto out2; 431 } 432 433 rc = 0; 434 out2: 435 close(fd); 436 out: 437 free(mnt); 438 439 return rc; 440 } 441 442 static int ipvrf_exec(int argc, char **argv) 443 { 444 if (argc < 1) { 445 fprintf(stderr, "No VRF name specified\n"); 446 return -1; 447 } 448 if (argc < 2) { 449 fprintf(stderr, "No command specified\n"); 450 return -1; 451 } 452 453 if (vrf_switch(argv[0])) 454 return -1; 455 456 return -cmd_exec(argv[1], argv + 1, !!batch_mode); 457 } 458 459 /* reset VRF association of current process to default VRF; 460 * used by netns_exec 461 */ 462 void vrf_reset(void) 463 { 464 char vrf[32]; 465 466 if (vrf_identify(getpid(), vrf, sizeof(vrf)) || 467 (vrf[0] == '\0')) 468 return; 469 470 vrf_switch("default"); 471 } 472 473 static int ipvrf_filter_req(struct nlmsghdr *nlh, int reqlen) 474 { 475 struct rtattr *linkinfo; 476 int err; 477 478 if (vrf_filter.kind) { 479 linkinfo = addattr_nest(nlh, reqlen, IFLA_LINKINFO); 480 481 err = addattr_l(nlh, reqlen, IFLA_INFO_KIND, vrf_filter.kind, 482 strlen(vrf_filter.kind)); 483 if (err) 484 return err; 485 486 addattr_nest_end(nlh, linkinfo); 487 } 488 489 return 0; 490 } 491 492 /* input arg is linkinfo */ 493 static __u32 vrf_table_linkinfo(struct rtattr *li[]) 494 { 495 struct rtattr *attr[IFLA_VRF_MAX + 1]; 496 497 if (li[IFLA_INFO_DATA]) { 498 parse_rtattr_nested(attr, IFLA_VRF_MAX, li[IFLA_INFO_DATA]); 499 500 if (attr[IFLA_VRF_TABLE]) 501 return rta_getattr_u32(attr[IFLA_VRF_TABLE]); 502 } 503 504 return 0; 505 } 506 507 static int ipvrf_print(struct nlmsghdr *n) 508 { 509 struct ifinfomsg *ifi = NLMSG_DATA(n); 510 struct rtattr *tb[IFLA_MAX+1]; 511 struct rtattr *li[IFLA_INFO_MAX+1]; 512 int len = n->nlmsg_len; 513 const char *name; 514 __u32 tb_id; 515 516 len -= NLMSG_LENGTH(sizeof(*ifi)); 517 if (len < 0) 518 return 0; 519 520 if (vrf_filter.ifindex && vrf_filter.ifindex != ifi->ifi_index) 521 return 0; 522 523 parse_rtattr(tb, IFLA_MAX, IFLA_RTA(ifi), len); 524 525 /* kernel does not support filter by master device */ 526 if (tb[IFLA_MASTER]) { 527 int master = *(int *)RTA_DATA(tb[IFLA_MASTER]); 528 529 if (vrf_filter.master && master != vrf_filter.master) 530 return 0; 531 } 532 533 if (!tb[IFLA_IFNAME]) { 534 fprintf(stderr, 535 "BUG: device with ifindex %d has nil ifname\n", 536 ifi->ifi_index); 537 return 0; 538 } 539 name = rta_getattr_str(tb[IFLA_IFNAME]); 540 541 /* missing LINKINFO means not VRF. e.g., kernel does not 542 * support filtering on kind, so userspace needs to handle 543 */ 544 if (!tb[IFLA_LINKINFO]) 545 return 0; 546 547 parse_rtattr_nested(li, IFLA_INFO_MAX, tb[IFLA_LINKINFO]); 548 549 if (!li[IFLA_INFO_KIND]) 550 return 0; 551 552 if (strcmp(RTA_DATA(li[IFLA_INFO_KIND]), "vrf")) 553 return 0; 554 555 tb_id = vrf_table_linkinfo(li); 556 if (!tb_id) { 557 fprintf(stderr, 558 "BUG: VRF %s is missing table id\n", name); 559 return 0; 560 } 561 562 printf("%-16s %5u", name, tb_id); 563 564 printf("\n"); 565 return 1; 566 } 567 568 static int ipvrf_show(int argc, char **argv) 569 { 570 struct nlmsg_chain linfo = { NULL, NULL}; 571 int rc = 0; 572 573 vrf_filter.kind = "vrf"; 574 575 if (argc > 1) 576 usage(); 577 578 if (argc == 1) { 579 __u32 tb_id; 580 581 tb_id = ipvrf_get_table(argv[0]); 582 if (!tb_id) { 583 fprintf(stderr, "Invalid VRF\n"); 584 return 1; 585 } 586 printf("%s %u\n", argv[0], tb_id); 587 return 0; 588 } 589 590 if (ip_linkaddr_list(0, ipvrf_filter_req, &linfo, NULL) == 0) { 591 struct nlmsg_list *l; 592 unsigned nvrf = 0; 593 int n; 594 595 n = printf("%-16s %5s\n", "Name", "Table"); 596 printf("%.*s\n", n-1, "-----------------------"); 597 for (l = linfo.head; l; l = l->next) 598 nvrf += ipvrf_print(&l->h); 599 600 if (!nvrf) 601 printf("No VRF has been configured\n"); 602 } else 603 rc = 1; 604 605 free_nlmsg_chain(&linfo); 606 607 return rc; 608 } 609 610 int do_ipvrf(int argc, char **argv) 611 { 612 if (argc == 0) 613 return ipvrf_show(0, NULL); 614 615 if (matches(*argv, "identify") == 0) 616 return ipvrf_identify(argc-1, argv+1); 617 618 if (matches(*argv, "pids") == 0) 619 return ipvrf_pids(argc-1, argv+1); 620 621 if (matches(*argv, "exec") == 0) 622 return ipvrf_exec(argc-1, argv+1); 623 624 if (matches(*argv, "show") == 0 || 625 matches(*argv, "lst") == 0 || 626 matches(*argv, "list") == 0) 627 return ipvrf_show(argc-1, argv+1); 628 629 if (matches(*argv, "help") == 0) 630 usage(); 631 632 fprintf(stderr, "Command \"%s\" is unknown, try \"ip vrf help\".\n", 633 *argv); 634 635 exit(-1); 636 } 637