1 /* 2 * tc_bpf.c BPF common code 3 * 4 * This program is free software; you can distribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Daniel Borkmann <dborkman (at) redhat.com> 10 * Jiri Pirko <jiri (at) resnulli.us> 11 * Alexei Starovoitov <ast (at) plumgrid.com> 12 */ 13 14 #include <stdio.h> 15 #include <stdlib.h> 16 #include <unistd.h> 17 #include <string.h> 18 #include <stdbool.h> 19 #include <stdint.h> 20 #include <errno.h> 21 #include <fcntl.h> 22 #include <stdarg.h> 23 24 #ifdef HAVE_ELF 25 #include <libelf.h> 26 #include <gelf.h> 27 #endif 28 29 #include <sys/types.h> 30 #include <sys/stat.h> 31 #include <sys/un.h> 32 #include <sys/vfs.h> 33 #include <sys/mount.h> 34 #include <sys/syscall.h> 35 #include <sys/sendfile.h> 36 #include <sys/resource.h> 37 38 #include <linux/bpf.h> 39 #include <linux/filter.h> 40 #include <linux/if_alg.h> 41 42 #include <arpa/inet.h> 43 44 #include "utils.h" 45 46 #include "bpf_elf.h" 47 #include "bpf_scm.h" 48 49 #include "tc_util.h" 50 #include "tc_bpf.h" 51 52 #ifdef HAVE_ELF 53 static int bpf_obj_open(const char *path, enum bpf_prog_type type, 54 const char *sec, bool verbose); 55 #else 56 static int bpf_obj_open(const char *path, enum bpf_prog_type type, 57 const char *sec, bool verbose) 58 { 59 fprintf(stderr, "No ELF library support compiled in.\n"); 60 errno = ENOSYS; 61 return -1; 62 } 63 #endif 64 65 static inline __u64 bpf_ptr_to_u64(const void *ptr) 66 { 67 return (__u64)(unsigned long)ptr; 68 } 69 70 static int bpf(int cmd, union bpf_attr *attr, unsigned int size) 71 { 72 #ifdef __NR_bpf 73 return syscall(__NR_bpf, cmd, attr, size); 74 #else 75 fprintf(stderr, "No bpf syscall, kernel headers too old?\n"); 76 errno = ENOSYS; 77 return -1; 78 #endif 79 } 80 81 static int bpf_map_update(int fd, const void *key, const void *value, 82 uint64_t flags) 83 { 84 union bpf_attr attr = { 85 .map_fd = fd, 86 .key = bpf_ptr_to_u64(key), 87 .value = bpf_ptr_to_u64(value), 88 .flags = flags, 89 }; 90 91 return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); 92 } 93 94 static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len, 95 char **bpf_string, bool *need_release, 96 const char separator) 97 { 98 char sp; 99 100 if (from_file) { 101 size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,"); 102 char *tmp_string; 103 FILE *fp; 104 105 tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len; 106 tmp_string = malloc(tmp_len); 107 if (tmp_string == NULL) 108 return -ENOMEM; 109 110 memset(tmp_string, 0, tmp_len); 111 112 fp = fopen(arg, "r"); 113 if (fp == NULL) { 114 perror("Cannot fopen"); 115 free(tmp_string); 116 return -ENOENT; 117 } 118 119 if (!fgets(tmp_string, tmp_len, fp)) { 120 free(tmp_string); 121 fclose(fp); 122 return -EIO; 123 } 124 125 fclose(fp); 126 127 *need_release = true; 128 *bpf_string = tmp_string; 129 } else { 130 *need_release = false; 131 *bpf_string = arg; 132 } 133 134 if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 || 135 sp != separator) { 136 if (*need_release) 137 free(*bpf_string); 138 return -EINVAL; 139 } 140 141 return 0; 142 } 143 144 static int bpf_ops_parse(int argc, char **argv, struct sock_filter *bpf_ops, 145 bool from_file) 146 { 147 char *bpf_string, *token, separator = ','; 148 int ret = 0, i = 0; 149 bool need_release; 150 __u16 bpf_len = 0; 151 152 if (argc < 1) 153 return -EINVAL; 154 if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string, 155 &need_release, separator)) 156 return -EINVAL; 157 if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) { 158 ret = -EINVAL; 159 goto out; 160 } 161 162 token = bpf_string; 163 while ((token = strchr(token, separator)) && (++token)[0]) { 164 if (i >= bpf_len) { 165 fprintf(stderr, "Real program length exceeds encoded " 166 "length parameter!\n"); 167 ret = -EINVAL; 168 goto out; 169 } 170 171 if (sscanf(token, "%hu %hhu %hhu %u,", 172 &bpf_ops[i].code, &bpf_ops[i].jt, 173 &bpf_ops[i].jf, &bpf_ops[i].k) != 4) { 174 fprintf(stderr, "Error at instruction %d!\n", i); 175 ret = -EINVAL; 176 goto out; 177 } 178 179 i++; 180 } 181 182 if (i != bpf_len) { 183 fprintf(stderr, "Parsed program length is less than encoded" 184 "length parameter!\n"); 185 ret = -EINVAL; 186 goto out; 187 } 188 ret = bpf_len; 189 out: 190 if (need_release) 191 free(bpf_string); 192 193 return ret; 194 } 195 196 void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len) 197 { 198 struct sock_filter *ops = (struct sock_filter *) RTA_DATA(bpf_ops); 199 int i; 200 201 if (len == 0) 202 return; 203 204 fprintf(f, "bytecode \'%u,", len); 205 206 for (i = 0; i < len - 1; i++) 207 fprintf(f, "%hu %hhu %hhu %u,", ops[i].code, ops[i].jt, 208 ops[i].jf, ops[i].k); 209 210 fprintf(f, "%hu %hhu %hhu %u\'", ops[i].code, ops[i].jt, 211 ops[i].jf, ops[i].k); 212 } 213 214 static int bpf_map_selfcheck_pinned(int fd, const struct bpf_elf_map *map, 215 int length) 216 { 217 char file[PATH_MAX], buff[4096]; 218 struct bpf_elf_map tmp, zero; 219 unsigned int val; 220 FILE *fp; 221 222 snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); 223 224 fp = fopen(file, "r"); 225 if (!fp) { 226 fprintf(stderr, "No procfs support?!\n"); 227 return -EIO; 228 } 229 230 memset(&tmp, 0, sizeof(tmp)); 231 while (fgets(buff, sizeof(buff), fp)) { 232 if (sscanf(buff, "map_type:\t%u", &val) == 1) 233 tmp.type = val; 234 else if (sscanf(buff, "key_size:\t%u", &val) == 1) 235 tmp.size_key = val; 236 else if (sscanf(buff, "value_size:\t%u", &val) == 1) 237 tmp.size_value = val; 238 else if (sscanf(buff, "max_entries:\t%u", &val) == 1) 239 tmp.max_elem = val; 240 } 241 242 fclose(fp); 243 244 if (!memcmp(&tmp, map, length)) { 245 return 0; 246 } else { 247 memset(&zero, 0, sizeof(zero)); 248 /* If kernel doesn't have eBPF-related fdinfo, we cannot do much, 249 * so just accept it. We know we do have an eBPF fd and in this 250 * case, everything is 0. It is guaranteed that no such map exists 251 * since map type of 0 is unloadable BPF_MAP_TYPE_UNSPEC. 252 */ 253 if (!memcmp(&tmp, &zero, length)) 254 return 0; 255 256 fprintf(stderr, "Map specs from pinned file differ!\n"); 257 return -EINVAL; 258 } 259 } 260 261 static int bpf_mnt_fs(const char *target) 262 { 263 bool bind_done = false; 264 265 while (mount("", target, "none", MS_PRIVATE | MS_REC, NULL)) { 266 if (errno != EINVAL || bind_done) { 267 fprintf(stderr, "mount --make-private %s failed: %s\n", 268 target, strerror(errno)); 269 return -1; 270 } 271 272 if (mount(target, target, "none", MS_BIND, NULL)) { 273 fprintf(stderr, "mount --bind %s %s failed: %s\n", 274 target, target, strerror(errno)); 275 return -1; 276 } 277 278 bind_done = true; 279 } 280 281 if (mount("bpf", target, "bpf", 0, NULL)) { 282 fprintf(stderr, "mount -t bpf bpf %s failed: %s\n", 283 target, strerror(errno)); 284 return -1; 285 } 286 287 return 0; 288 } 289 290 static int bpf_valid_mntpt(const char *mnt, unsigned long magic) 291 { 292 struct statfs st_fs; 293 294 if (statfs(mnt, &st_fs) < 0) 295 return -ENOENT; 296 if ((unsigned long)st_fs.f_type != magic) 297 return -ENOENT; 298 299 return 0; 300 } 301 302 static const char *bpf_find_mntpt(const char *fstype, unsigned long magic, 303 char *mnt, int len, 304 const char * const *known_mnts) 305 { 306 const char * const *ptr; 307 char type[100]; 308 FILE *fp; 309 310 if (known_mnts) { 311 ptr = known_mnts; 312 while (*ptr) { 313 if (bpf_valid_mntpt(*ptr, magic) == 0) { 314 strncpy(mnt, *ptr, len - 1); 315 mnt[len - 1] = 0; 316 return mnt; 317 } 318 ptr++; 319 } 320 } 321 322 fp = fopen("/proc/mounts", "r"); 323 if (fp == NULL || len != PATH_MAX) 324 return NULL; 325 326 while (fscanf(fp, "%*s %" textify(PATH_MAX) "s %99s %*s %*d %*d\n", 327 mnt, type) == 2) { 328 if (strcmp(type, fstype) == 0) 329 break; 330 } 331 332 fclose(fp); 333 if (strcmp(type, fstype) != 0) 334 return NULL; 335 336 return mnt; 337 } 338 339 int bpf_trace_pipe(void) 340 { 341 char tracefs_mnt[PATH_MAX] = TRACE_DIR_MNT; 342 static const char * const tracefs_known_mnts[] = { 343 TRACE_DIR_MNT, 344 "/sys/kernel/debug/tracing", 345 "/tracing", 346 "/trace", 347 0, 348 }; 349 char tpipe[PATH_MAX]; 350 const char *mnt; 351 int fd; 352 353 mnt = bpf_find_mntpt("tracefs", TRACEFS_MAGIC, tracefs_mnt, 354 sizeof(tracefs_mnt), tracefs_known_mnts); 355 if (!mnt) { 356 fprintf(stderr, "tracefs not mounted?\n"); 357 return -1; 358 } 359 360 snprintf(tpipe, sizeof(tpipe), "%s/trace_pipe", mnt); 361 362 fd = open(tpipe, O_RDONLY); 363 if (fd < 0) 364 return -1; 365 366 fprintf(stderr, "Running! Hang up with ^C!\n\n"); 367 while (1) { 368 static char buff[4096]; 369 ssize_t ret; 370 371 ret = read(fd, buff, sizeof(buff) - 1); 372 if (ret > 0) { 373 write(2, buff, ret); 374 fflush(stderr); 375 } 376 } 377 378 return 0; 379 } 380 381 static const char *bpf_get_tc_dir(void) 382 { 383 static bool bpf_mnt_cached = false; 384 static char bpf_tc_dir[PATH_MAX]; 385 static const char *mnt; 386 static const char * const bpf_known_mnts[] = { 387 BPF_DIR_MNT, 388 0, 389 }; 390 char bpf_mnt[PATH_MAX] = BPF_DIR_MNT; 391 char bpf_glo_dir[PATH_MAX]; 392 int ret; 393 394 if (bpf_mnt_cached) 395 goto done; 396 397 mnt = bpf_find_mntpt("bpf", BPF_FS_MAGIC, bpf_mnt, sizeof(bpf_mnt), 398 bpf_known_mnts); 399 if (!mnt) { 400 mnt = getenv(BPF_ENV_MNT); 401 if (!mnt) 402 mnt = BPF_DIR_MNT; 403 ret = bpf_mnt_fs(mnt); 404 if (ret) { 405 mnt = NULL; 406 goto out; 407 } 408 } 409 410 snprintf(bpf_tc_dir, sizeof(bpf_tc_dir), "%s/%s", mnt, BPF_DIR_TC); 411 ret = mkdir(bpf_tc_dir, S_IRWXU); 412 if (ret && errno != EEXIST) { 413 fprintf(stderr, "mkdir %s failed: %s\n", bpf_tc_dir, 414 strerror(errno)); 415 mnt = NULL; 416 goto out; 417 } 418 419 snprintf(bpf_glo_dir, sizeof(bpf_glo_dir), "%s/%s", 420 bpf_tc_dir, BPF_DIR_GLOBALS); 421 ret = mkdir(bpf_glo_dir, S_IRWXU); 422 if (ret && errno != EEXIST) { 423 fprintf(stderr, "mkdir %s failed: %s\n", bpf_glo_dir, 424 strerror(errno)); 425 mnt = NULL; 426 goto out; 427 } 428 429 mnt = bpf_tc_dir; 430 out: 431 bpf_mnt_cached = true; 432 done: 433 return mnt; 434 } 435 436 static int bpf_obj_get(const char *pathname) 437 { 438 union bpf_attr attr; 439 char tmp[PATH_MAX]; 440 441 if (strlen(pathname) > 2 && pathname[0] == 'm' && 442 pathname[1] == ':' && bpf_get_tc_dir()) { 443 snprintf(tmp, sizeof(tmp), "%s/%s", 444 bpf_get_tc_dir(), pathname + 2); 445 pathname = tmp; 446 } 447 448 memset(&attr, 0, sizeof(attr)); 449 attr.pathname = bpf_ptr_to_u64(pathname); 450 451 return bpf(BPF_OBJ_GET, &attr, sizeof(attr)); 452 } 453 454 const char *bpf_default_section(const enum bpf_prog_type type) 455 { 456 switch (type) { 457 case BPF_PROG_TYPE_SCHED_CLS: 458 return ELF_SECTION_CLASSIFIER; 459 case BPF_PROG_TYPE_SCHED_ACT: 460 return ELF_SECTION_ACTION; 461 default: 462 return NULL; 463 } 464 } 465 466 enum bpf_mode { 467 CBPF_BYTECODE = 0, 468 CBPF_FILE, 469 EBPF_OBJECT, 470 EBPF_PINNED, 471 __BPF_MODE_MAX, 472 #define BPF_MODE_MAX __BPF_MODE_MAX 473 }; 474 475 static int bpf_parse(int *ptr_argc, char ***ptr_argv, const bool *opt_tbl, 476 enum bpf_prog_type *type, enum bpf_mode *mode, 477 const char **ptr_object, const char **ptr_section, 478 const char **ptr_uds_name, struct sock_filter *opcodes) 479 { 480 const char *file, *section, *uds_name; 481 bool verbose = false; 482 int ret, argc; 483 char **argv; 484 485 argv = *ptr_argv; 486 argc = *ptr_argc; 487 488 if (opt_tbl[CBPF_BYTECODE] && 489 (matches(*argv, "bytecode") == 0 || 490 strcmp(*argv, "bc") == 0)) { 491 *mode = CBPF_BYTECODE; 492 } else if (opt_tbl[CBPF_FILE] && 493 (matches(*argv, "bytecode-file") == 0 || 494 strcmp(*argv, "bcf") == 0)) { 495 *mode = CBPF_FILE; 496 } else if (opt_tbl[EBPF_OBJECT] && 497 (matches(*argv, "object-file") == 0 || 498 strcmp(*argv, "obj") == 0)) { 499 *mode = EBPF_OBJECT; 500 } else if (opt_tbl[EBPF_PINNED] && 501 (matches(*argv, "object-pinned") == 0 || 502 matches(*argv, "pinned") == 0 || 503 matches(*argv, "fd") == 0)) { 504 *mode = EBPF_PINNED; 505 } else { 506 fprintf(stderr, "What mode is \"%s\"?\n", *argv); 507 return -1; 508 } 509 510 NEXT_ARG(); 511 file = section = uds_name = NULL; 512 if (*mode == EBPF_OBJECT || *mode == EBPF_PINNED) { 513 file = *argv; 514 NEXT_ARG_FWD(); 515 516 if (*type == BPF_PROG_TYPE_UNSPEC) { 517 if (argc > 0 && matches(*argv, "type") == 0) { 518 NEXT_ARG(); 519 if (matches(*argv, "cls") == 0) { 520 *type = BPF_PROG_TYPE_SCHED_CLS; 521 } else if (matches(*argv, "act") == 0) { 522 *type = BPF_PROG_TYPE_SCHED_ACT; 523 } else { 524 fprintf(stderr, "What type is \"%s\"?\n", 525 *argv); 526 return -1; 527 } 528 NEXT_ARG_FWD(); 529 } else { 530 *type = BPF_PROG_TYPE_SCHED_CLS; 531 } 532 } 533 534 section = bpf_default_section(*type); 535 if (argc > 0 && matches(*argv, "section") == 0) { 536 NEXT_ARG(); 537 section = *argv; 538 NEXT_ARG_FWD(); 539 } 540 541 uds_name = getenv(BPF_ENV_UDS); 542 if (argc > 0 && !uds_name && 543 matches(*argv, "export") == 0) { 544 NEXT_ARG(); 545 uds_name = *argv; 546 NEXT_ARG_FWD(); 547 } 548 549 if (argc > 0 && matches(*argv, "verbose") == 0) { 550 verbose = true; 551 NEXT_ARG_FWD(); 552 } 553 554 PREV_ARG(); 555 } 556 557 if (*mode == CBPF_BYTECODE || *mode == CBPF_FILE) 558 ret = bpf_ops_parse(argc, argv, opcodes, *mode == CBPF_FILE); 559 else if (*mode == EBPF_OBJECT) 560 ret = bpf_obj_open(file, *type, section, verbose); 561 else if (*mode == EBPF_PINNED) 562 ret = bpf_obj_get(file); 563 else 564 return -1; 565 566 if (ptr_object) 567 *ptr_object = file; 568 if (ptr_section) 569 *ptr_section = section; 570 if (ptr_uds_name) 571 *ptr_uds_name = uds_name; 572 573 *ptr_argc = argc; 574 *ptr_argv = argv; 575 576 return ret; 577 } 578 579 int bpf_parse_common(int *ptr_argc, char ***ptr_argv, const int *nla_tbl, 580 enum bpf_prog_type type, const char **ptr_object, 581 const char **ptr_uds_name, struct nlmsghdr *n) 582 { 583 struct sock_filter opcodes[BPF_MAXINSNS]; 584 const bool opt_tbl[BPF_MODE_MAX] = { 585 [CBPF_BYTECODE] = true, 586 [CBPF_FILE] = true, 587 [EBPF_OBJECT] = true, 588 [EBPF_PINNED] = true, 589 }; 590 char annotation[256]; 591 const char *section; 592 enum bpf_mode mode; 593 int ret; 594 595 ret = bpf_parse(ptr_argc, ptr_argv, opt_tbl, &type, &mode, 596 ptr_object, §ion, ptr_uds_name, opcodes); 597 if (ret < 0) 598 return ret; 599 600 if (mode == CBPF_BYTECODE || mode == CBPF_FILE) { 601 addattr16(n, MAX_MSG, nla_tbl[BPF_NLA_OPS_LEN], ret); 602 addattr_l(n, MAX_MSG, nla_tbl[BPF_NLA_OPS], opcodes, 603 ret * sizeof(struct sock_filter)); 604 } 605 606 if (mode == EBPF_OBJECT || mode == EBPF_PINNED) { 607 snprintf(annotation, sizeof(annotation), "%s:[%s]", 608 basename(*ptr_object), mode == EBPF_PINNED ? 609 "*fsobj" : section); 610 611 addattr32(n, MAX_MSG, nla_tbl[BPF_NLA_FD], ret); 612 addattrstrz(n, MAX_MSG, nla_tbl[BPF_NLA_NAME], annotation); 613 } 614 615 return 0; 616 } 617 618 int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv) 619 { 620 enum bpf_prog_type type = BPF_PROG_TYPE_UNSPEC; 621 const bool opt_tbl[BPF_MODE_MAX] = { 622 [CBPF_BYTECODE] = false, 623 [CBPF_FILE] = false, 624 [EBPF_OBJECT] = true, 625 [EBPF_PINNED] = true, 626 }; 627 const struct bpf_elf_map test = { 628 .type = BPF_MAP_TYPE_PROG_ARRAY, 629 .size_key = sizeof(int), 630 .size_value = sizeof(int), 631 }; 632 int ret, prog_fd, map_fd; 633 const char *section; 634 enum bpf_mode mode; 635 uint32_t map_key; 636 637 prog_fd = bpf_parse(&argc, &argv, opt_tbl, &type, &mode, 638 NULL, §ion, NULL, NULL); 639 if (prog_fd < 0) 640 return prog_fd; 641 if (key) { 642 map_key = *key; 643 } else { 644 ret = sscanf(section, "%*i/%i", &map_key); 645 if (ret != 1) { 646 fprintf(stderr, "Couldn\'t infer map key from section " 647 "name! Please provide \'key\' argument!\n"); 648 ret = -EINVAL; 649 goto out_prog; 650 } 651 } 652 653 map_fd = bpf_obj_get(map_path); 654 if (map_fd < 0) { 655 fprintf(stderr, "Couldn\'t retrieve pinned map \'%s\': %s\n", 656 map_path, strerror(errno)); 657 ret = map_fd; 658 goto out_prog; 659 } 660 661 ret = bpf_map_selfcheck_pinned(map_fd, &test, 662 offsetof(struct bpf_elf_map, max_elem)); 663 if (ret < 0) { 664 fprintf(stderr, "Map \'%s\' self-check failed!\n", map_path); 665 goto out_map; 666 } 667 668 ret = bpf_map_update(map_fd, &map_key, &prog_fd, BPF_ANY); 669 if (ret < 0) 670 fprintf(stderr, "Map update failed: %s\n", strerror(errno)); 671 out_map: 672 close(map_fd); 673 out_prog: 674 close(prog_fd); 675 return ret; 676 } 677 678 #ifdef HAVE_ELF 679 struct bpf_elf_prog { 680 enum bpf_prog_type type; 681 const struct bpf_insn *insns; 682 size_t size; 683 const char *license; 684 }; 685 686 struct bpf_hash_entry { 687 unsigned int pinning; 688 const char *subpath; 689 struct bpf_hash_entry *next; 690 }; 691 692 struct bpf_elf_ctx { 693 Elf *elf_fd; 694 GElf_Ehdr elf_hdr; 695 Elf_Data *sym_tab; 696 Elf_Data *str_tab; 697 int obj_fd; 698 int map_fds[ELF_MAX_MAPS]; 699 struct bpf_elf_map maps[ELF_MAX_MAPS]; 700 int sym_num; 701 int map_num; 702 bool *sec_done; 703 int sec_maps; 704 char license[ELF_MAX_LICENSE_LEN]; 705 enum bpf_prog_type type; 706 bool verbose; 707 struct bpf_elf_st stat; 708 struct bpf_hash_entry *ht[256]; 709 }; 710 711 struct bpf_elf_sec_data { 712 GElf_Shdr sec_hdr; 713 Elf_Data *sec_data; 714 const char *sec_name; 715 }; 716 717 struct bpf_map_data { 718 int *fds; 719 const char *obj; 720 struct bpf_elf_st *st; 721 struct bpf_elf_map *ent; 722 }; 723 724 /* If we provide a small buffer with log level enabled, the kernel 725 * could fail program load as no buffer space is available for the 726 * log and thus verifier fails. In case something doesn't pass the 727 * verifier we still want to hand something descriptive to the user. 728 */ 729 static char bpf_log_buf[65536]; 730 731 static __check_format_string(1, 2) void bpf_dump_error(const char *format, ...) 732 { 733 va_list vl; 734 735 va_start(vl, format); 736 vfprintf(stderr, format, vl); 737 va_end(vl); 738 739 if (bpf_log_buf[0]) { 740 fprintf(stderr, "%s\n", bpf_log_buf); 741 memset(bpf_log_buf, 0, sizeof(bpf_log_buf)); 742 } 743 } 744 745 static int bpf_map_create(enum bpf_map_type type, unsigned int size_key, 746 unsigned int size_value, unsigned int max_elem) 747 { 748 union bpf_attr attr = { 749 .map_type = type, 750 .key_size = size_key, 751 .value_size = size_value, 752 .max_entries = max_elem, 753 }; 754 755 return bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); 756 } 757 758 static int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns, 759 size_t size, const char *license) 760 { 761 union bpf_attr attr = { 762 .prog_type = type, 763 .insns = bpf_ptr_to_u64(insns), 764 .insn_cnt = size / sizeof(struct bpf_insn), 765 .license = bpf_ptr_to_u64(license), 766 .log_buf = bpf_ptr_to_u64(bpf_log_buf), 767 .log_size = sizeof(bpf_log_buf), 768 .log_level = 1, 769 }; 770 771 if (getenv(BPF_ENV_NOLOG)) { 772 attr.log_buf = 0; 773 attr.log_size = 0; 774 attr.log_level = 0; 775 } 776 777 return bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); 778 } 779 780 static int bpf_obj_pin(int fd, const char *pathname) 781 { 782 union bpf_attr attr = { 783 .pathname = bpf_ptr_to_u64(pathname), 784 .bpf_fd = fd, 785 }; 786 787 return bpf(BPF_OBJ_PIN, &attr, sizeof(attr)); 788 } 789 790 static int bpf_obj_hash(const char *object, uint8_t *out, size_t len) 791 { 792 struct sockaddr_alg alg = { 793 .salg_family = AF_ALG, 794 .salg_type = "hash", 795 .salg_name = "sha1", 796 }; 797 int ret, cfd, ofd, ffd; 798 struct stat stbuff; 799 ssize_t size; 800 801 if (!object || len != 20) 802 return -EINVAL; 803 804 cfd = socket(AF_ALG, SOCK_SEQPACKET, 0); 805 if (cfd < 0) { 806 fprintf(stderr, "Cannot get AF_ALG socket: %s\n", 807 strerror(errno)); 808 return cfd; 809 } 810 811 ret = bind(cfd, (struct sockaddr *)&alg, sizeof(alg)); 812 if (ret < 0) { 813 fprintf(stderr, "Error binding socket: %s\n", strerror(errno)); 814 goto out_cfd; 815 } 816 817 ofd = accept(cfd, NULL, 0); 818 if (ofd < 0) { 819 fprintf(stderr, "Error accepting socket: %s\n", 820 strerror(errno)); 821 ret = ofd; 822 goto out_cfd; 823 } 824 825 ffd = open(object, O_RDONLY); 826 if (ffd < 0) { 827 fprintf(stderr, "Error opening object %s: %s\n", 828 object, strerror(errno)); 829 ret = ffd; 830 goto out_ofd; 831 } 832 833 ret = fstat(ffd, &stbuff); 834 if (ret < 0) { 835 fprintf(stderr, "Error doing fstat: %s\n", 836 strerror(errno)); 837 goto out_ffd; 838 } 839 840 size = sendfile(ofd, ffd, NULL, stbuff.st_size); 841 if (size != stbuff.st_size) { 842 fprintf(stderr, "Error from sendfile (%zd vs %zu bytes): %s\n", 843 size, stbuff.st_size, strerror(errno)); 844 ret = -1; 845 goto out_ffd; 846 } 847 848 size = read(ofd, out, len); 849 if (size != len) { 850 fprintf(stderr, "Error from read (%zd vs %zu bytes): %s\n", 851 size, len, strerror(errno)); 852 ret = -1; 853 } else { 854 ret = 0; 855 } 856 out_ffd: 857 close(ffd); 858 out_ofd: 859 close(ofd); 860 out_cfd: 861 close(cfd); 862 return ret; 863 } 864 865 static const char *bpf_get_obj_uid(const char *pathname) 866 { 867 static bool bpf_uid_cached = false; 868 static char bpf_uid[64]; 869 uint8_t tmp[20]; 870 int ret; 871 872 if (bpf_uid_cached) 873 goto done; 874 875 ret = bpf_obj_hash(pathname, tmp, sizeof(tmp)); 876 if (ret) { 877 fprintf(stderr, "Object hashing failed!\n"); 878 return NULL; 879 } 880 881 hexstring_n2a(tmp, sizeof(tmp), bpf_uid, sizeof(bpf_uid)); 882 bpf_uid_cached = true; 883 done: 884 return bpf_uid; 885 } 886 887 static int bpf_init_env(const char *pathname) 888 { 889 struct rlimit limit = { 890 .rlim_cur = RLIM_INFINITY, 891 .rlim_max = RLIM_INFINITY, 892 }; 893 894 /* Don't bother in case we fail! */ 895 setrlimit(RLIMIT_MEMLOCK, &limit); 896 897 if (!bpf_get_tc_dir()) { 898 fprintf(stderr, "Continuing without mounted eBPF fs. " 899 "Too old kernel?\n"); 900 return 0; 901 } 902 903 if (!bpf_get_obj_uid(pathname)) 904 return -1; 905 906 return 0; 907 } 908 909 static const char *bpf_custom_pinning(const struct bpf_elf_ctx *ctx, 910 uint32_t pinning) 911 { 912 struct bpf_hash_entry *entry; 913 914 entry = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)]; 915 while (entry && entry->pinning != pinning) 916 entry = entry->next; 917 918 return entry ? entry->subpath : NULL; 919 } 920 921 static bool bpf_no_pinning(const struct bpf_elf_ctx *ctx, 922 uint32_t pinning) 923 { 924 switch (pinning) { 925 case PIN_OBJECT_NS: 926 case PIN_GLOBAL_NS: 927 return false; 928 case PIN_NONE: 929 return true; 930 default: 931 return !bpf_custom_pinning(ctx, pinning); 932 } 933 } 934 935 static void bpf_make_pathname(char *pathname, size_t len, const char *name, 936 const struct bpf_elf_ctx *ctx, uint32_t pinning) 937 { 938 switch (pinning) { 939 case PIN_OBJECT_NS: 940 snprintf(pathname, len, "%s/%s/%s", bpf_get_tc_dir(), 941 bpf_get_obj_uid(NULL), name); 942 break; 943 case PIN_GLOBAL_NS: 944 snprintf(pathname, len, "%s/%s/%s", bpf_get_tc_dir(), 945 BPF_DIR_GLOBALS, name); 946 break; 947 default: 948 snprintf(pathname, len, "%s/../%s/%s", bpf_get_tc_dir(), 949 bpf_custom_pinning(ctx, pinning), name); 950 break; 951 } 952 } 953 954 static int bpf_probe_pinned(const char *name, const struct bpf_elf_ctx *ctx, 955 uint32_t pinning) 956 { 957 char pathname[PATH_MAX]; 958 959 if (bpf_no_pinning(ctx, pinning) || !bpf_get_tc_dir()) 960 return 0; 961 962 bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning); 963 return bpf_obj_get(pathname); 964 } 965 966 static int bpf_make_obj_path(void) 967 { 968 char tmp[PATH_MAX]; 969 int ret; 970 971 snprintf(tmp, sizeof(tmp), "%s/%s", bpf_get_tc_dir(), 972 bpf_get_obj_uid(NULL)); 973 974 ret = mkdir(tmp, S_IRWXU); 975 if (ret && errno != EEXIST) { 976 fprintf(stderr, "mkdir %s failed: %s\n", tmp, strerror(errno)); 977 return ret; 978 } 979 980 return 0; 981 } 982 983 static int bpf_make_custom_path(const char *todo) 984 { 985 char tmp[PATH_MAX], rem[PATH_MAX], *sub; 986 int ret; 987 988 snprintf(tmp, sizeof(tmp), "%s/../", bpf_get_tc_dir()); 989 snprintf(rem, sizeof(rem), "%s/", todo); 990 sub = strtok(rem, "/"); 991 992 while (sub) { 993 if (strlen(tmp) + strlen(sub) + 2 > PATH_MAX) 994 return -EINVAL; 995 996 strcat(tmp, sub); 997 strcat(tmp, "/"); 998 999 ret = mkdir(tmp, S_IRWXU); 1000 if (ret && errno != EEXIST) { 1001 fprintf(stderr, "mkdir %s failed: %s\n", tmp, 1002 strerror(errno)); 1003 return ret; 1004 } 1005 1006 sub = strtok(NULL, "/"); 1007 } 1008 1009 return 0; 1010 } 1011 1012 static int bpf_place_pinned(int fd, const char *name, 1013 const struct bpf_elf_ctx *ctx, uint32_t pinning) 1014 { 1015 char pathname[PATH_MAX]; 1016 const char *tmp; 1017 int ret = 0; 1018 1019 if (bpf_no_pinning(ctx, pinning) || !bpf_get_tc_dir()) 1020 return 0; 1021 1022 if (pinning == PIN_OBJECT_NS) 1023 ret = bpf_make_obj_path(); 1024 else if ((tmp = bpf_custom_pinning(ctx, pinning))) 1025 ret = bpf_make_custom_path(tmp); 1026 if (ret < 0) 1027 return ret; 1028 1029 bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning); 1030 return bpf_obj_pin(fd, pathname); 1031 } 1032 1033 static int bpf_prog_attach(const char *section, 1034 const struct bpf_elf_prog *prog, bool verbose) 1035 { 1036 int fd; 1037 1038 /* We can add pinning here later as well, same as bpf_map_attach(). */ 1039 errno = 0; 1040 fd = bpf_prog_load(prog->type, prog->insns, prog->size, 1041 prog->license); 1042 if (fd < 0 || verbose) { 1043 bpf_dump_error("Prog section \'%s\' (type:%u insns:%zu " 1044 "license:\'%s\') %s%s (%d)!\n\n", 1045 section, prog->type, 1046 prog->size / sizeof(struct bpf_insn), 1047 prog->license, fd < 0 ? "rejected: " : 1048 "loaded", fd < 0 ? strerror(errno) : "", 1049 fd < 0 ? errno : fd); 1050 } 1051 1052 return fd; 1053 } 1054 1055 static int bpf_map_attach(const char *name, const struct bpf_elf_map *map, 1056 const struct bpf_elf_ctx *ctx, bool verbose) 1057 { 1058 int fd, ret; 1059 1060 fd = bpf_probe_pinned(name, ctx, map->pinning); 1061 if (fd > 0) { 1062 ret = bpf_map_selfcheck_pinned(fd, map, 1063 offsetof(struct bpf_elf_map, 1064 id)); 1065 if (ret < 0) { 1066 close(fd); 1067 fprintf(stderr, "Map \'%s\' self-check failed!\n", 1068 name); 1069 return ret; 1070 } 1071 if (verbose) 1072 fprintf(stderr, "Map \'%s\' loaded as pinned!\n", 1073 name); 1074 return fd; 1075 } 1076 1077 errno = 0; 1078 fd = bpf_map_create(map->type, map->size_key, map->size_value, 1079 map->max_elem); 1080 if (fd < 0 || verbose) { 1081 bpf_dump_error("Map \'%s\' (type:%u id:%u pinning:%u " 1082 "ksize:%u vsize:%u max-elems:%u) %s%s (%d)!\n", 1083 name, map->type, map->id, map->pinning, 1084 map->size_key, map->size_value, map->max_elem, 1085 fd < 0 ? "rejected: " : "loaded", fd < 0 ? 1086 strerror(errno) : "", fd < 0 ? errno : fd); 1087 if (fd < 0) 1088 return fd; 1089 } 1090 1091 ret = bpf_place_pinned(fd, name, ctx, map->pinning); 1092 if (ret < 0 && errno != EEXIST) { 1093 fprintf(stderr, "Could not pin %s map: %s\n", name, 1094 strerror(errno)); 1095 close(fd); 1096 return ret; 1097 } 1098 1099 return fd; 1100 } 1101 1102 #define __ELF_ST_BIND(x) ((x) >> 4) 1103 #define __ELF_ST_TYPE(x) (((unsigned int) x) & 0xf) 1104 1105 static const char *bpf_str_tab_name(const struct bpf_elf_ctx *ctx, 1106 const GElf_Sym *sym) 1107 { 1108 return ctx->str_tab->d_buf + sym->st_name; 1109 } 1110 1111 static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which) 1112 { 1113 GElf_Sym sym; 1114 int i; 1115 1116 for (i = 0; i < ctx->sym_num; i++) { 1117 if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym) 1118 continue; 1119 1120 if (__ELF_ST_BIND(sym.st_info) != STB_GLOBAL || 1121 __ELF_ST_TYPE(sym.st_info) != STT_NOTYPE || 1122 sym.st_shndx != ctx->sec_maps || 1123 sym.st_value / sizeof(struct bpf_elf_map) != which) 1124 continue; 1125 1126 return bpf_str_tab_name(ctx, &sym); 1127 } 1128 1129 return NULL; 1130 } 1131 1132 static int bpf_maps_attach_all(struct bpf_elf_ctx *ctx) 1133 { 1134 const char *map_name; 1135 int i, fd; 1136 1137 for (i = 0; i < ctx->map_num; i++) { 1138 map_name = bpf_map_fetch_name(ctx, i); 1139 if (!map_name) 1140 return -EIO; 1141 1142 fd = bpf_map_attach(map_name, &ctx->maps[i], ctx, 1143 ctx->verbose); 1144 if (fd < 0) 1145 return fd; 1146 1147 ctx->map_fds[i] = fd; 1148 } 1149 1150 return 0; 1151 } 1152 1153 static int bpf_fill_section_data(struct bpf_elf_ctx *ctx, int section, 1154 struct bpf_elf_sec_data *data) 1155 { 1156 Elf_Data *sec_edata; 1157 GElf_Shdr sec_hdr; 1158 Elf_Scn *sec_fd; 1159 char *sec_name; 1160 1161 memset(data, 0, sizeof(*data)); 1162 1163 sec_fd = elf_getscn(ctx->elf_fd, section); 1164 if (!sec_fd) 1165 return -EINVAL; 1166 if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr) 1167 return -EIO; 1168 1169 sec_name = elf_strptr(ctx->elf_fd, ctx->elf_hdr.e_shstrndx, 1170 sec_hdr.sh_name); 1171 if (!sec_name || !sec_hdr.sh_size) 1172 return -ENOENT; 1173 1174 sec_edata = elf_getdata(sec_fd, NULL); 1175 if (!sec_edata || elf_getdata(sec_fd, sec_edata)) 1176 return -EIO; 1177 1178 memcpy(&data->sec_hdr, &sec_hdr, sizeof(sec_hdr)); 1179 1180 data->sec_name = sec_name; 1181 data->sec_data = sec_edata; 1182 return 0; 1183 } 1184 1185 static int bpf_fetch_maps(struct bpf_elf_ctx *ctx, int section, 1186 struct bpf_elf_sec_data *data) 1187 { 1188 if (data->sec_data->d_size % sizeof(struct bpf_elf_map) != 0) 1189 return -EINVAL; 1190 1191 ctx->map_num = data->sec_data->d_size / sizeof(struct bpf_elf_map); 1192 ctx->sec_maps = section; 1193 ctx->sec_done[section] = true; 1194 1195 if (ctx->map_num > ARRAY_SIZE(ctx->map_fds)) { 1196 fprintf(stderr, "Too many BPF maps in ELF section!\n"); 1197 return -ENOMEM; 1198 } 1199 1200 memcpy(ctx->maps, data->sec_data->d_buf, data->sec_data->d_size); 1201 return 0; 1202 } 1203 1204 static int bpf_fetch_license(struct bpf_elf_ctx *ctx, int section, 1205 struct bpf_elf_sec_data *data) 1206 { 1207 if (data->sec_data->d_size > sizeof(ctx->license)) 1208 return -ENOMEM; 1209 1210 memcpy(ctx->license, data->sec_data->d_buf, data->sec_data->d_size); 1211 ctx->sec_done[section] = true; 1212 return 0; 1213 } 1214 1215 static int bpf_fetch_symtab(struct bpf_elf_ctx *ctx, int section, 1216 struct bpf_elf_sec_data *data) 1217 { 1218 ctx->sym_tab = data->sec_data; 1219 ctx->sym_num = data->sec_hdr.sh_size / data->sec_hdr.sh_entsize; 1220 ctx->sec_done[section] = true; 1221 return 0; 1222 } 1223 1224 static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section, 1225 struct bpf_elf_sec_data *data) 1226 { 1227 ctx->str_tab = data->sec_data; 1228 ctx->sec_done[section] = true; 1229 return 0; 1230 } 1231 1232 static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx) 1233 { 1234 struct bpf_elf_sec_data data; 1235 int i, ret = -1; 1236 1237 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { 1238 ret = bpf_fill_section_data(ctx, i, &data); 1239 if (ret < 0) 1240 continue; 1241 1242 if (data.sec_hdr.sh_type == SHT_PROGBITS && 1243 !strcmp(data.sec_name, ELF_SECTION_MAPS)) 1244 ret = bpf_fetch_maps(ctx, i, &data); 1245 else if (data.sec_hdr.sh_type == SHT_PROGBITS && 1246 !strcmp(data.sec_name, ELF_SECTION_LICENSE)) 1247 ret = bpf_fetch_license(ctx, i, &data); 1248 else if (data.sec_hdr.sh_type == SHT_SYMTAB && 1249 !strcmp(data.sec_name, ".symtab")) 1250 ret = bpf_fetch_symtab(ctx, i, &data); 1251 else if (data.sec_hdr.sh_type == SHT_STRTAB && 1252 !strcmp(data.sec_name, ".strtab")) 1253 ret = bpf_fetch_strtab(ctx, i, &data); 1254 if (ret < 0) { 1255 fprintf(stderr, "Error parsing section %d! Perhaps" 1256 "check with readelf -a?\n", i); 1257 break; 1258 } 1259 } 1260 1261 if (ctx->sym_tab && ctx->str_tab && ctx->sec_maps) { 1262 ret = bpf_maps_attach_all(ctx); 1263 if (ret < 0) { 1264 fprintf(stderr, "Error loading maps into kernel!\n"); 1265 return ret; 1266 } 1267 } 1268 1269 return ret; 1270 } 1271 1272 static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section) 1273 { 1274 struct bpf_elf_sec_data data; 1275 struct bpf_elf_prog prog; 1276 int ret, i, fd = -1; 1277 1278 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { 1279 if (ctx->sec_done[i]) 1280 continue; 1281 1282 ret = bpf_fill_section_data(ctx, i, &data); 1283 if (ret < 0 || 1284 !(data.sec_hdr.sh_type == SHT_PROGBITS && 1285 data.sec_hdr.sh_flags & SHF_EXECINSTR && 1286 !strcmp(data.sec_name, section))) 1287 continue; 1288 1289 memset(&prog, 0, sizeof(prog)); 1290 prog.type = ctx->type; 1291 prog.insns = data.sec_data->d_buf; 1292 prog.size = data.sec_data->d_size; 1293 prog.license = ctx->license; 1294 1295 fd = bpf_prog_attach(section, &prog, ctx->verbose); 1296 if (fd < 0) 1297 continue; 1298 1299 ctx->sec_done[i] = true; 1300 break; 1301 } 1302 1303 return fd; 1304 } 1305 1306 static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx, 1307 struct bpf_elf_sec_data *data_relo, 1308 struct bpf_elf_sec_data *data_insn) 1309 { 1310 Elf_Data *idata = data_insn->sec_data; 1311 GElf_Shdr *rhdr = &data_relo->sec_hdr; 1312 int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize; 1313 struct bpf_insn *insns = idata->d_buf; 1314 unsigned int num_insns = idata->d_size / sizeof(*insns); 1315 1316 for (relo_ent = 0; relo_ent < relo_num; relo_ent++) { 1317 unsigned int ioff, rmap; 1318 GElf_Rel relo; 1319 GElf_Sym sym; 1320 1321 if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo) 1322 return -EIO; 1323 1324 ioff = relo.r_offset / sizeof(struct bpf_insn); 1325 if (ioff >= num_insns || 1326 insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW)) 1327 return -EINVAL; 1328 1329 if (gelf_getsym(ctx->sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym) 1330 return -EIO; 1331 1332 rmap = sym.st_value / sizeof(struct bpf_elf_map); 1333 if (rmap >= ARRAY_SIZE(ctx->map_fds)) 1334 return -EINVAL; 1335 if (!ctx->map_fds[rmap]) 1336 return -EINVAL; 1337 1338 if (ctx->verbose) 1339 fprintf(stderr, "Map \'%s\' (%d) injected into prog " 1340 "section \'%s\' at offset %u!\n", 1341 bpf_str_tab_name(ctx, &sym), ctx->map_fds[rmap], 1342 data_insn->sec_name, ioff); 1343 1344 insns[ioff].src_reg = BPF_PSEUDO_MAP_FD; 1345 insns[ioff].imm = ctx->map_fds[rmap]; 1346 } 1347 1348 return 0; 1349 } 1350 1351 static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section) 1352 { 1353 struct bpf_elf_sec_data data_relo, data_insn; 1354 struct bpf_elf_prog prog; 1355 int ret, idx, i, fd = -1; 1356 1357 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { 1358 ret = bpf_fill_section_data(ctx, i, &data_relo); 1359 if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL) 1360 continue; 1361 1362 idx = data_relo.sec_hdr.sh_info; 1363 ret = bpf_fill_section_data(ctx, idx, &data_insn); 1364 if (ret < 0 || 1365 !(data_insn.sec_hdr.sh_type == SHT_PROGBITS && 1366 data_insn.sec_hdr.sh_flags & SHF_EXECINSTR && 1367 !strcmp(data_insn.sec_name, section))) 1368 continue; 1369 1370 ret = bpf_apply_relo_data(ctx, &data_relo, &data_insn); 1371 if (ret < 0) 1372 continue; 1373 1374 memset(&prog, 0, sizeof(prog)); 1375 prog.type = ctx->type; 1376 prog.insns = data_insn.sec_data->d_buf; 1377 prog.size = data_insn.sec_data->d_size; 1378 prog.license = ctx->license; 1379 1380 fd = bpf_prog_attach(section, &prog, ctx->verbose); 1381 if (fd < 0) 1382 continue; 1383 1384 ctx->sec_done[i] = true; 1385 ctx->sec_done[idx] = true; 1386 break; 1387 } 1388 1389 return fd; 1390 } 1391 1392 static int bpf_fetch_prog_sec(struct bpf_elf_ctx *ctx, const char *section) 1393 { 1394 int ret = -1; 1395 1396 if (ctx->sym_tab) 1397 ret = bpf_fetch_prog_relo(ctx, section); 1398 if (ret < 0) 1399 ret = bpf_fetch_prog(ctx, section); 1400 1401 return ret; 1402 } 1403 1404 static int bpf_find_map_by_id(struct bpf_elf_ctx *ctx, uint32_t id) 1405 { 1406 int i; 1407 1408 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) 1409 if (ctx->map_fds[i] && ctx->maps[i].id == id && 1410 ctx->maps[i].type == BPF_MAP_TYPE_PROG_ARRAY) 1411 return i; 1412 return -1; 1413 } 1414 1415 static int bpf_fill_prog_arrays(struct bpf_elf_ctx *ctx) 1416 { 1417 struct bpf_elf_sec_data data; 1418 uint32_t map_id, key_id; 1419 int fd, i, ret, idx; 1420 1421 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { 1422 if (ctx->sec_done[i]) 1423 continue; 1424 1425 ret = bpf_fill_section_data(ctx, i, &data); 1426 if (ret < 0) 1427 continue; 1428 1429 ret = sscanf(data.sec_name, "%i/%i", &map_id, &key_id); 1430 if (ret != 2) 1431 continue; 1432 1433 idx = bpf_find_map_by_id(ctx, map_id); 1434 if (idx < 0) 1435 continue; 1436 1437 fd = bpf_fetch_prog_sec(ctx, data.sec_name); 1438 if (fd < 0) 1439 return -EIO; 1440 1441 ret = bpf_map_update(ctx->map_fds[idx], &key_id, 1442 &fd, BPF_ANY); 1443 if (ret < 0) 1444 return -ENOENT; 1445 1446 ctx->sec_done[i] = true; 1447 } 1448 1449 return 0; 1450 } 1451 1452 static void bpf_save_finfo(struct bpf_elf_ctx *ctx) 1453 { 1454 struct stat st; 1455 int ret; 1456 1457 memset(&ctx->stat, 0, sizeof(ctx->stat)); 1458 1459 ret = fstat(ctx->obj_fd, &st); 1460 if (ret < 0) { 1461 fprintf(stderr, "Stat of elf file failed: %s\n", 1462 strerror(errno)); 1463 return; 1464 } 1465 1466 ctx->stat.st_dev = st.st_dev; 1467 ctx->stat.st_ino = st.st_ino; 1468 } 1469 1470 static int bpf_read_pin_mapping(FILE *fp, uint32_t *id, char *path) 1471 { 1472 char buff[PATH_MAX]; 1473 1474 while (fgets(buff, sizeof(buff), fp)) { 1475 char *ptr = buff; 1476 1477 while (*ptr == ' ' || *ptr == '\t') 1478 ptr++; 1479 1480 if (*ptr == '#' || *ptr == '\n' || *ptr == 0) 1481 continue; 1482 1483 if (sscanf(ptr, "%i %s\n", id, path) != 2 && 1484 sscanf(ptr, "%i %s #", id, path) != 2) { 1485 strcpy(path, ptr); 1486 return -1; 1487 } 1488 1489 return 1; 1490 } 1491 1492 return 0; 1493 } 1494 1495 static bool bpf_pinning_reserved(uint32_t pinning) 1496 { 1497 switch (pinning) { 1498 case PIN_NONE: 1499 case PIN_OBJECT_NS: 1500 case PIN_GLOBAL_NS: 1501 return true; 1502 default: 1503 return false; 1504 } 1505 } 1506 1507 static void bpf_hash_init(struct bpf_elf_ctx *ctx, const char *db_file) 1508 { 1509 struct bpf_hash_entry *entry; 1510 char subpath[PATH_MAX]; 1511 uint32_t pinning; 1512 FILE *fp; 1513 int ret; 1514 1515 fp = fopen(db_file, "r"); 1516 if (!fp) 1517 return; 1518 1519 memset(subpath, 0, sizeof(subpath)); 1520 while ((ret = bpf_read_pin_mapping(fp, &pinning, subpath))) { 1521 if (ret == -1) { 1522 fprintf(stderr, "Database %s is corrupted at: %s\n", 1523 db_file, subpath); 1524 fclose(fp); 1525 return; 1526 } 1527 1528 if (bpf_pinning_reserved(pinning)) { 1529 fprintf(stderr, "Database %s, id %u is reserved - " 1530 "ignoring!\n", db_file, pinning); 1531 continue; 1532 } 1533 1534 entry = malloc(sizeof(*entry)); 1535 if (!entry) { 1536 fprintf(stderr, "No memory left for db entry!\n"); 1537 continue; 1538 } 1539 1540 entry->pinning = pinning; 1541 entry->subpath = strdup(subpath); 1542 if (!entry->subpath) { 1543 fprintf(stderr, "No memory left for db entry!\n"); 1544 free(entry); 1545 continue; 1546 } 1547 1548 entry->next = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)]; 1549 ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)] = entry; 1550 } 1551 1552 fclose(fp); 1553 } 1554 1555 static void bpf_hash_destroy(struct bpf_elf_ctx *ctx) 1556 { 1557 struct bpf_hash_entry *entry; 1558 int i; 1559 1560 for (i = 0; i < ARRAY_SIZE(ctx->ht); i++) { 1561 while ((entry = ctx->ht[i]) != NULL) { 1562 ctx->ht[i] = entry->next; 1563 free((char *)entry->subpath); 1564 free(entry); 1565 } 1566 } 1567 } 1568 1569 static int bpf_elf_check_ehdr(const struct bpf_elf_ctx *ctx) 1570 { 1571 if (ctx->elf_hdr.e_type != ET_REL || 1572 ctx->elf_hdr.e_machine != 0 || 1573 ctx->elf_hdr.e_version != EV_CURRENT) { 1574 fprintf(stderr, "ELF format error, ELF file not for eBPF?\n"); 1575 return -EINVAL; 1576 } 1577 1578 switch (ctx->elf_hdr.e_ident[EI_DATA]) { 1579 default: 1580 fprintf(stderr, "ELF format error, wrong endianness info?\n"); 1581 return -EINVAL; 1582 case ELFDATA2LSB: 1583 if (htons(1) == 1) { 1584 fprintf(stderr, 1585 "We are big endian, eBPF object is little endian!\n"); 1586 return -EIO; 1587 } 1588 break; 1589 case ELFDATA2MSB: 1590 if (htons(1) != 1) { 1591 fprintf(stderr, 1592 "We are little endian, eBPF object is big endian!\n"); 1593 return -EIO; 1594 } 1595 break; 1596 } 1597 1598 return 0; 1599 } 1600 1601 static int bpf_elf_ctx_init(struct bpf_elf_ctx *ctx, const char *pathname, 1602 enum bpf_prog_type type, bool verbose) 1603 { 1604 int ret = -EINVAL; 1605 1606 if (elf_version(EV_CURRENT) == EV_NONE || 1607 bpf_init_env(pathname)) 1608 return ret; 1609 1610 memset(ctx, 0, sizeof(*ctx)); 1611 ctx->verbose = verbose; 1612 ctx->type = type; 1613 1614 ctx->obj_fd = open(pathname, O_RDONLY); 1615 if (ctx->obj_fd < 0) 1616 return ctx->obj_fd; 1617 1618 ctx->elf_fd = elf_begin(ctx->obj_fd, ELF_C_READ, NULL); 1619 if (!ctx->elf_fd) { 1620 ret = -EINVAL; 1621 goto out_fd; 1622 } 1623 1624 if (elf_kind(ctx->elf_fd) != ELF_K_ELF) { 1625 ret = -EINVAL; 1626 goto out_fd; 1627 } 1628 1629 if (gelf_getehdr(ctx->elf_fd, &ctx->elf_hdr) != 1630 &ctx->elf_hdr) { 1631 ret = -EIO; 1632 goto out_elf; 1633 } 1634 1635 ret = bpf_elf_check_ehdr(ctx); 1636 if (ret < 0) 1637 goto out_elf; 1638 1639 ctx->sec_done = calloc(ctx->elf_hdr.e_shnum, 1640 sizeof(*(ctx->sec_done))); 1641 if (!ctx->sec_done) { 1642 ret = -ENOMEM; 1643 goto out_elf; 1644 } 1645 1646 bpf_save_finfo(ctx); 1647 bpf_hash_init(ctx, CONFDIR "/bpf_pinning"); 1648 1649 return 0; 1650 out_elf: 1651 elf_end(ctx->elf_fd); 1652 out_fd: 1653 close(ctx->obj_fd); 1654 return ret; 1655 } 1656 1657 static int bpf_maps_count(struct bpf_elf_ctx *ctx) 1658 { 1659 int i, count = 0; 1660 1661 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) { 1662 if (!ctx->map_fds[i]) 1663 break; 1664 count++; 1665 } 1666 1667 return count; 1668 } 1669 1670 static void bpf_maps_teardown(struct bpf_elf_ctx *ctx) 1671 { 1672 int i; 1673 1674 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) { 1675 if (ctx->map_fds[i]) 1676 close(ctx->map_fds[i]); 1677 } 1678 } 1679 1680 static void bpf_elf_ctx_destroy(struct bpf_elf_ctx *ctx, bool failure) 1681 { 1682 if (failure) 1683 bpf_maps_teardown(ctx); 1684 1685 bpf_hash_destroy(ctx); 1686 free(ctx->sec_done); 1687 elf_end(ctx->elf_fd); 1688 close(ctx->obj_fd); 1689 } 1690 1691 static struct bpf_elf_ctx __ctx; 1692 1693 static int bpf_obj_open(const char *pathname, enum bpf_prog_type type, 1694 const char *section, bool verbose) 1695 { 1696 struct bpf_elf_ctx *ctx = &__ctx; 1697 int fd = 0, ret; 1698 1699 ret = bpf_elf_ctx_init(ctx, pathname, type, verbose); 1700 if (ret < 0) { 1701 fprintf(stderr, "Cannot initialize ELF context!\n"); 1702 return ret; 1703 } 1704 1705 ret = bpf_fetch_ancillary(ctx); 1706 if (ret < 0) { 1707 fprintf(stderr, "Error fetching ELF ancillary data!\n"); 1708 goto out; 1709 } 1710 1711 fd = bpf_fetch_prog_sec(ctx, section); 1712 if (fd < 0) { 1713 fprintf(stderr, "Error fetching program/map!\n"); 1714 ret = fd; 1715 goto out; 1716 } 1717 1718 ret = bpf_fill_prog_arrays(ctx); 1719 if (ret < 0) 1720 fprintf(stderr, "Error filling program arrays!\n"); 1721 out: 1722 bpf_elf_ctx_destroy(ctx, ret < 0); 1723 if (ret < 0) { 1724 if (fd) 1725 close(fd); 1726 return ret; 1727 } 1728 1729 return fd; 1730 } 1731 1732 static int 1733 bpf_map_set_send(int fd, struct sockaddr_un *addr, unsigned int addr_len, 1734 const struct bpf_map_data *aux, unsigned int entries) 1735 { 1736 struct bpf_map_set_msg msg; 1737 int *cmsg_buf, min_fd; 1738 char *amsg_buf; 1739 int i; 1740 1741 memset(&msg, 0, sizeof(msg)); 1742 1743 msg.aux.uds_ver = BPF_SCM_AUX_VER; 1744 msg.aux.num_ent = entries; 1745 1746 strncpy(msg.aux.obj_name, aux->obj, sizeof(msg.aux.obj_name)); 1747 memcpy(&msg.aux.obj_st, aux->st, sizeof(msg.aux.obj_st)); 1748 1749 cmsg_buf = bpf_map_set_init(&msg, addr, addr_len); 1750 amsg_buf = (char *)msg.aux.ent; 1751 1752 for (i = 0; i < entries; i += min_fd) { 1753 int ret; 1754 1755 min_fd = min(BPF_SCM_MAX_FDS * 1U, entries - i); 1756 bpf_map_set_init_single(&msg, min_fd); 1757 1758 memcpy(cmsg_buf, &aux->fds[i], sizeof(aux->fds[0]) * min_fd); 1759 memcpy(amsg_buf, &aux->ent[i], sizeof(aux->ent[0]) * min_fd); 1760 1761 ret = sendmsg(fd, &msg.hdr, 0); 1762 if (ret <= 0) 1763 return ret ? : -1; 1764 } 1765 1766 return 0; 1767 } 1768 1769 static int 1770 bpf_map_set_recv(int fd, int *fds, struct bpf_map_aux *aux, 1771 unsigned int entries) 1772 { 1773 struct bpf_map_set_msg msg; 1774 int *cmsg_buf, min_fd; 1775 char *amsg_buf, *mmsg_buf; 1776 unsigned int needed = 1; 1777 int i; 1778 1779 cmsg_buf = bpf_map_set_init(&msg, NULL, 0); 1780 amsg_buf = (char *)msg.aux.ent; 1781 mmsg_buf = (char *)&msg.aux; 1782 1783 for (i = 0; i < min(entries, needed); i += min_fd) { 1784 struct cmsghdr *cmsg; 1785 int ret; 1786 1787 min_fd = min(entries, entries - i); 1788 bpf_map_set_init_single(&msg, min_fd); 1789 1790 ret = recvmsg(fd, &msg.hdr, 0); 1791 if (ret <= 0) 1792 return ret ? : -1; 1793 1794 cmsg = CMSG_FIRSTHDR(&msg.hdr); 1795 if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS) 1796 return -EINVAL; 1797 if (msg.hdr.msg_flags & MSG_CTRUNC) 1798 return -EIO; 1799 if (msg.aux.uds_ver != BPF_SCM_AUX_VER) 1800 return -ENOSYS; 1801 1802 min_fd = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof(fd); 1803 if (min_fd > entries || min_fd <= 0) 1804 return -EINVAL; 1805 1806 memcpy(&fds[i], cmsg_buf, sizeof(fds[0]) * min_fd); 1807 memcpy(&aux->ent[i], amsg_buf, sizeof(aux->ent[0]) * min_fd); 1808 memcpy(aux, mmsg_buf, offsetof(struct bpf_map_aux, ent)); 1809 1810 needed = aux->num_ent; 1811 } 1812 1813 return 0; 1814 } 1815 1816 int bpf_send_map_fds(const char *path, const char *obj) 1817 { 1818 struct bpf_elf_ctx *ctx = &__ctx; 1819 struct sockaddr_un addr; 1820 struct bpf_map_data bpf_aux; 1821 int fd, ret; 1822 1823 fd = socket(AF_UNIX, SOCK_DGRAM, 0); 1824 if (fd < 0) { 1825 fprintf(stderr, "Cannot open socket: %s\n", 1826 strerror(errno)); 1827 return -1; 1828 } 1829 1830 memset(&addr, 0, sizeof(addr)); 1831 addr.sun_family = AF_UNIX; 1832 strncpy(addr.sun_path, path, sizeof(addr.sun_path)); 1833 1834 ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr)); 1835 if (ret < 0) { 1836 fprintf(stderr, "Cannot connect to %s: %s\n", 1837 path, strerror(errno)); 1838 return -1; 1839 } 1840 1841 memset(&bpf_aux, 0, sizeof(bpf_aux)); 1842 1843 bpf_aux.fds = ctx->map_fds; 1844 bpf_aux.ent = ctx->maps; 1845 bpf_aux.st = &ctx->stat; 1846 bpf_aux.obj = obj; 1847 1848 ret = bpf_map_set_send(fd, &addr, sizeof(addr), &bpf_aux, 1849 bpf_maps_count(ctx)); 1850 if (ret < 0) 1851 fprintf(stderr, "Cannot send fds to %s: %s\n", 1852 path, strerror(errno)); 1853 1854 bpf_maps_teardown(ctx); 1855 close(fd); 1856 return ret; 1857 } 1858 1859 int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux, 1860 unsigned int entries) 1861 { 1862 struct sockaddr_un addr; 1863 int fd, ret; 1864 1865 fd = socket(AF_UNIX, SOCK_DGRAM, 0); 1866 if (fd < 0) { 1867 fprintf(stderr, "Cannot open socket: %s\n", 1868 strerror(errno)); 1869 return -1; 1870 } 1871 1872 memset(&addr, 0, sizeof(addr)); 1873 addr.sun_family = AF_UNIX; 1874 strncpy(addr.sun_path, path, sizeof(addr.sun_path)); 1875 1876 ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr)); 1877 if (ret < 0) { 1878 fprintf(stderr, "Cannot bind to socket: %s\n", 1879 strerror(errno)); 1880 return -1; 1881 } 1882 1883 ret = bpf_map_set_recv(fd, fds, aux, entries); 1884 if (ret < 0) 1885 fprintf(stderr, "Cannot recv fds from %s: %s\n", 1886 path, strerror(errno)); 1887 1888 unlink(addr.sun_path); 1889 close(fd); 1890 return ret; 1891 } 1892 #endif /* HAVE_ELF */ 1893