1 /* 2 * bpf.c BPF common code 3 * 4 * This program is free software; you can distribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Daniel Borkmann <daniel (at) iogearbox.net> 10 * Jiri Pirko <jiri (at) resnulli.us> 11 * Alexei Starovoitov <ast (at) kernel.org> 12 */ 13 14 #include <stdio.h> 15 #include <stdlib.h> 16 #include <unistd.h> 17 #include <string.h> 18 #include <stdbool.h> 19 #include <stdint.h> 20 #include <errno.h> 21 #include <fcntl.h> 22 #include <stdarg.h> 23 #include <limits.h> 24 #include <assert.h> 25 26 #ifdef HAVE_ELF 27 #include <libelf.h> 28 #include <gelf.h> 29 #endif 30 31 #include <sys/types.h> 32 #include <sys/stat.h> 33 #include <sys/un.h> 34 #include <sys/vfs.h> 35 #include <sys/mount.h> 36 #include <sys/syscall.h> 37 #include <sys/sendfile.h> 38 #include <sys/resource.h> 39 40 #include <arpa/inet.h> 41 42 #include "utils.h" 43 #include "json_print.h" 44 45 #include "bpf_util.h" 46 #include "bpf_elf.h" 47 #include "bpf_scm.h" 48 49 struct bpf_prog_meta { 50 const char *type; 51 const char *subdir; 52 const char *section; 53 bool may_uds_export; 54 }; 55 56 static const enum bpf_prog_type __bpf_types[] = { 57 BPF_PROG_TYPE_SCHED_CLS, 58 BPF_PROG_TYPE_SCHED_ACT, 59 BPF_PROG_TYPE_XDP, 60 BPF_PROG_TYPE_LWT_IN, 61 BPF_PROG_TYPE_LWT_OUT, 62 BPF_PROG_TYPE_LWT_XMIT, 63 }; 64 65 static const struct bpf_prog_meta __bpf_prog_meta[] = { 66 [BPF_PROG_TYPE_SCHED_CLS] = { 67 .type = "cls", 68 .subdir = "tc", 69 .section = ELF_SECTION_CLASSIFIER, 70 .may_uds_export = true, 71 }, 72 [BPF_PROG_TYPE_SCHED_ACT] = { 73 .type = "act", 74 .subdir = "tc", 75 .section = ELF_SECTION_ACTION, 76 .may_uds_export = true, 77 }, 78 [BPF_PROG_TYPE_XDP] = { 79 .type = "xdp", 80 .subdir = "xdp", 81 .section = ELF_SECTION_PROG, 82 }, 83 [BPF_PROG_TYPE_LWT_IN] = { 84 .type = "lwt_in", 85 .subdir = "ip", 86 .section = ELF_SECTION_PROG, 87 }, 88 [BPF_PROG_TYPE_LWT_OUT] = { 89 .type = "lwt_out", 90 .subdir = "ip", 91 .section = ELF_SECTION_PROG, 92 }, 93 [BPF_PROG_TYPE_LWT_XMIT] = { 94 .type = "lwt_xmit", 95 .subdir = "ip", 96 .section = ELF_SECTION_PROG, 97 }, 98 }; 99 100 static const char *bpf_prog_to_subdir(enum bpf_prog_type type) 101 { 102 assert(type < ARRAY_SIZE(__bpf_prog_meta) && 103 __bpf_prog_meta[type].subdir); 104 return __bpf_prog_meta[type].subdir; 105 } 106 107 const char *bpf_prog_to_default_section(enum bpf_prog_type type) 108 { 109 assert(type < ARRAY_SIZE(__bpf_prog_meta) && 110 __bpf_prog_meta[type].section); 111 return __bpf_prog_meta[type].section; 112 } 113 114 #ifdef HAVE_ELF 115 static int bpf_obj_open(const char *path, enum bpf_prog_type type, 116 const char *sec, bool verbose); 117 #else 118 static int bpf_obj_open(const char *path, enum bpf_prog_type type, 119 const char *sec, bool verbose) 120 { 121 fprintf(stderr, "No ELF library support compiled in.\n"); 122 errno = ENOSYS; 123 return -1; 124 } 125 #endif 126 127 static inline __u64 bpf_ptr_to_u64(const void *ptr) 128 { 129 return (__u64)(unsigned long)ptr; 130 } 131 132 static int bpf(int cmd, union bpf_attr *attr, unsigned int size) 133 { 134 #ifdef __NR_bpf 135 return syscall(__NR_bpf, cmd, attr, size); 136 #else 137 fprintf(stderr, "No bpf syscall, kernel headers too old?\n"); 138 errno = ENOSYS; 139 return -1; 140 #endif 141 } 142 143 static int bpf_map_update(int fd, const void *key, const void *value, 144 uint64_t flags) 145 { 146 union bpf_attr attr = {}; 147 148 attr.map_fd = fd; 149 attr.key = bpf_ptr_to_u64(key); 150 attr.value = bpf_ptr_to_u64(value); 151 attr.flags = flags; 152 153 return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); 154 } 155 156 static int bpf_prog_fd_by_id(uint32_t id) 157 { 158 union bpf_attr attr = {}; 159 160 attr.prog_id = id; 161 162 return bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr)); 163 } 164 165 static int bpf_prog_info_by_fd(int fd, struct bpf_prog_info *info, 166 uint32_t *info_len) 167 { 168 union bpf_attr attr = {}; 169 int ret; 170 171 attr.info.bpf_fd = fd; 172 attr.info.info = bpf_ptr_to_u64(info); 173 attr.info.info_len = *info_len; 174 175 *info_len = 0; 176 ret = bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr)); 177 if (!ret) 178 *info_len = attr.info.info_len; 179 180 return ret; 181 } 182 183 int bpf_dump_prog_info(FILE *f, uint32_t id) 184 { 185 struct bpf_prog_info info = {}; 186 uint32_t len = sizeof(info); 187 int fd, ret, dump_ok = 0; 188 SPRINT_BUF(tmp); 189 190 open_json_object("prog"); 191 print_uint(PRINT_ANY, "id", "id %u ", id); 192 193 fd = bpf_prog_fd_by_id(id); 194 if (fd < 0) 195 goto out; 196 197 ret = bpf_prog_info_by_fd(fd, &info, &len); 198 if (!ret && len) { 199 int jited = !!info.jited_prog_len; 200 201 print_string(PRINT_ANY, "tag", "tag %s ", 202 hexstring_n2a(info.tag, sizeof(info.tag), 203 tmp, sizeof(tmp))); 204 print_uint(PRINT_JSON, "jited", NULL, jited); 205 if (jited && !is_json_context()) 206 fprintf(f, "jited "); 207 dump_ok = 1; 208 } 209 210 close(fd); 211 out: 212 close_json_object(); 213 return dump_ok; 214 } 215 216 static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len, 217 char **bpf_string, bool *need_release, 218 const char separator) 219 { 220 char sp; 221 222 if (from_file) { 223 size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,"); 224 char *tmp_string, *pos, c_prev = ' '; 225 FILE *fp; 226 int c; 227 228 tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len; 229 tmp_string = pos = calloc(1, tmp_len); 230 if (tmp_string == NULL) 231 return -ENOMEM; 232 233 fp = fopen(arg, "r"); 234 if (fp == NULL) { 235 perror("Cannot fopen"); 236 free(tmp_string); 237 return -ENOENT; 238 } 239 240 while ((c = fgetc(fp)) != EOF) { 241 switch (c) { 242 case '\n': 243 if (c_prev != ',') 244 *(pos++) = ','; 245 c_prev = ','; 246 break; 247 case ' ': 248 case '\t': 249 if (c_prev != ' ') 250 *(pos++) = c; 251 c_prev = ' '; 252 break; 253 default: 254 *(pos++) = c; 255 c_prev = c; 256 } 257 if (pos - tmp_string == tmp_len) 258 break; 259 } 260 261 if (!feof(fp)) { 262 free(tmp_string); 263 fclose(fp); 264 return -E2BIG; 265 } 266 267 fclose(fp); 268 *pos = 0; 269 270 *need_release = true; 271 *bpf_string = tmp_string; 272 } else { 273 *need_release = false; 274 *bpf_string = arg; 275 } 276 277 if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 || 278 sp != separator) { 279 if (*need_release) 280 free(*bpf_string); 281 return -EINVAL; 282 } 283 284 return 0; 285 } 286 287 static int bpf_ops_parse(int argc, char **argv, struct sock_filter *bpf_ops, 288 bool from_file) 289 { 290 char *bpf_string, *token, separator = ','; 291 int ret = 0, i = 0; 292 bool need_release; 293 __u16 bpf_len = 0; 294 295 if (argc < 1) 296 return -EINVAL; 297 if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string, 298 &need_release, separator)) 299 return -EINVAL; 300 if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) { 301 ret = -EINVAL; 302 goto out; 303 } 304 305 token = bpf_string; 306 while ((token = strchr(token, separator)) && (++token)[0]) { 307 if (i >= bpf_len) { 308 fprintf(stderr, "Real program length exceeds encoded length parameter!\n"); 309 ret = -EINVAL; 310 goto out; 311 } 312 313 if (sscanf(token, "%hu %hhu %hhu %u,", 314 &bpf_ops[i].code, &bpf_ops[i].jt, 315 &bpf_ops[i].jf, &bpf_ops[i].k) != 4) { 316 fprintf(stderr, "Error at instruction %d!\n", i); 317 ret = -EINVAL; 318 goto out; 319 } 320 321 i++; 322 } 323 324 if (i != bpf_len) { 325 fprintf(stderr, "Parsed program length is less than encoded length parameter!\n"); 326 ret = -EINVAL; 327 goto out; 328 } 329 ret = bpf_len; 330 out: 331 if (need_release) 332 free(bpf_string); 333 334 return ret; 335 } 336 337 void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len) 338 { 339 struct sock_filter *ops = RTA_DATA(bpf_ops); 340 int i; 341 342 if (len == 0) 343 return; 344 345 fprintf(f, "bytecode \'%u,", len); 346 347 for (i = 0; i < len - 1; i++) 348 fprintf(f, "%hu %hhu %hhu %u,", ops[i].code, ops[i].jt, 349 ops[i].jf, ops[i].k); 350 351 fprintf(f, "%hu %hhu %hhu %u\'", ops[i].code, ops[i].jt, 352 ops[i].jf, ops[i].k); 353 } 354 355 static void bpf_map_pin_report(const struct bpf_elf_map *pin, 356 const struct bpf_elf_map *obj) 357 { 358 fprintf(stderr, "Map specification differs from pinned file!\n"); 359 360 if (obj->type != pin->type) 361 fprintf(stderr, " - Type: %u (obj) != %u (pin)\n", 362 obj->type, pin->type); 363 if (obj->size_key != pin->size_key) 364 fprintf(stderr, " - Size key: %u (obj) != %u (pin)\n", 365 obj->size_key, pin->size_key); 366 if (obj->size_value != pin->size_value) 367 fprintf(stderr, " - Size value: %u (obj) != %u (pin)\n", 368 obj->size_value, pin->size_value); 369 if (obj->max_elem != pin->max_elem) 370 fprintf(stderr, " - Max elems: %u (obj) != %u (pin)\n", 371 obj->max_elem, pin->max_elem); 372 if (obj->flags != pin->flags) 373 fprintf(stderr, " - Flags: %#x (obj) != %#x (pin)\n", 374 obj->flags, pin->flags); 375 376 fprintf(stderr, "\n"); 377 } 378 379 struct bpf_prog_data { 380 unsigned int type; 381 unsigned int jited; 382 }; 383 384 struct bpf_map_ext { 385 struct bpf_prog_data owner; 386 }; 387 388 static int bpf_derive_elf_map_from_fdinfo(int fd, struct bpf_elf_map *map, 389 struct bpf_map_ext *ext) 390 { 391 unsigned int val, owner_type = 0, owner_jited = 0; 392 char file[PATH_MAX], buff[4096]; 393 FILE *fp; 394 395 snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); 396 memset(map, 0, sizeof(*map)); 397 398 fp = fopen(file, "r"); 399 if (!fp) { 400 fprintf(stderr, "No procfs support?!\n"); 401 return -EIO; 402 } 403 404 while (fgets(buff, sizeof(buff), fp)) { 405 if (sscanf(buff, "map_type:\t%u", &val) == 1) 406 map->type = val; 407 else if (sscanf(buff, "key_size:\t%u", &val) == 1) 408 map->size_key = val; 409 else if (sscanf(buff, "value_size:\t%u", &val) == 1) 410 map->size_value = val; 411 else if (sscanf(buff, "max_entries:\t%u", &val) == 1) 412 map->max_elem = val; 413 else if (sscanf(buff, "map_flags:\t%i", &val) == 1) 414 map->flags = val; 415 else if (sscanf(buff, "owner_prog_type:\t%i", &val) == 1) 416 owner_type = val; 417 else if (sscanf(buff, "owner_jited:\t%i", &val) == 1) 418 owner_jited = val; 419 } 420 421 fclose(fp); 422 if (ext) { 423 memset(ext, 0, sizeof(*ext)); 424 ext->owner.type = owner_type; 425 ext->owner.jited = owner_jited; 426 } 427 428 return 0; 429 } 430 431 static int bpf_map_selfcheck_pinned(int fd, const struct bpf_elf_map *map, 432 struct bpf_map_ext *ext, int length, 433 enum bpf_prog_type type) 434 { 435 struct bpf_elf_map tmp, zero = {}; 436 int ret; 437 438 ret = bpf_derive_elf_map_from_fdinfo(fd, &tmp, ext); 439 if (ret < 0) 440 return ret; 441 442 /* The decision to reject this is on kernel side eventually, but 443 * at least give the user a chance to know what's wrong. 444 */ 445 if (ext->owner.type && ext->owner.type != type) 446 fprintf(stderr, "Program array map owner types differ: %u (obj) != %u (pin)\n", 447 type, ext->owner.type); 448 449 if (!memcmp(&tmp, map, length)) { 450 return 0; 451 } else { 452 /* If kernel doesn't have eBPF-related fdinfo, we cannot do much, 453 * so just accept it. We know we do have an eBPF fd and in this 454 * case, everything is 0. It is guaranteed that no such map exists 455 * since map type of 0 is unloadable BPF_MAP_TYPE_UNSPEC. 456 */ 457 if (!memcmp(&tmp, &zero, length)) 458 return 0; 459 460 bpf_map_pin_report(&tmp, map); 461 return -EINVAL; 462 } 463 } 464 465 static int bpf_mnt_fs(const char *target) 466 { 467 bool bind_done = false; 468 469 while (mount("", target, "none", MS_PRIVATE | MS_REC, NULL)) { 470 if (errno != EINVAL || bind_done) { 471 fprintf(stderr, "mount --make-private %s failed: %s\n", 472 target, strerror(errno)); 473 return -1; 474 } 475 476 if (mount(target, target, "none", MS_BIND, NULL)) { 477 fprintf(stderr, "mount --bind %s %s failed: %s\n", 478 target, target, strerror(errno)); 479 return -1; 480 } 481 482 bind_done = true; 483 } 484 485 if (mount("bpf", target, "bpf", 0, "mode=0700")) { 486 fprintf(stderr, "mount -t bpf bpf %s failed: %s\n", 487 target, strerror(errno)); 488 return -1; 489 } 490 491 return 0; 492 } 493 494 static int bpf_mnt_check_target(const char *target) 495 { 496 struct stat sb = {}; 497 int ret; 498 499 ret = stat(target, &sb); 500 if (ret) { 501 ret = mkdir(target, S_IRWXU); 502 if (ret) { 503 fprintf(stderr, "mkdir %s failed: %s\n", target, 504 strerror(errno)); 505 return ret; 506 } 507 } 508 509 return 0; 510 } 511 512 static int bpf_valid_mntpt(const char *mnt, unsigned long magic) 513 { 514 struct statfs st_fs; 515 516 if (statfs(mnt, &st_fs) < 0) 517 return -ENOENT; 518 if ((unsigned long)st_fs.f_type != magic) 519 return -ENOENT; 520 521 return 0; 522 } 523 524 static const char *bpf_find_mntpt_single(unsigned long magic, char *mnt, 525 int len, const char *mntpt) 526 { 527 int ret; 528 529 ret = bpf_valid_mntpt(mntpt, magic); 530 if (!ret) { 531 strlcpy(mnt, mntpt, len); 532 return mnt; 533 } 534 535 return NULL; 536 } 537 538 static const char *bpf_find_mntpt(const char *fstype, unsigned long magic, 539 char *mnt, int len, 540 const char * const *known_mnts) 541 { 542 const char * const *ptr; 543 char type[100]; 544 FILE *fp; 545 546 if (known_mnts) { 547 ptr = known_mnts; 548 while (*ptr) { 549 if (bpf_find_mntpt_single(magic, mnt, len, *ptr)) 550 return mnt; 551 ptr++; 552 } 553 } 554 555 if (len != PATH_MAX) 556 return NULL; 557 558 fp = fopen("/proc/mounts", "r"); 559 if (fp == NULL) 560 return NULL; 561 562 while (fscanf(fp, "%*s %" textify(PATH_MAX) "s %99s %*s %*d %*d\n", 563 mnt, type) == 2) { 564 if (strcmp(type, fstype) == 0) 565 break; 566 } 567 568 fclose(fp); 569 if (strcmp(type, fstype) != 0) 570 return NULL; 571 572 return mnt; 573 } 574 575 int bpf_trace_pipe(void) 576 { 577 char tracefs_mnt[PATH_MAX] = TRACE_DIR_MNT; 578 static const char * const tracefs_known_mnts[] = { 579 TRACE_DIR_MNT, 580 "/sys/kernel/debug/tracing", 581 "/tracing", 582 "/trace", 583 0, 584 }; 585 int fd_in, fd_out = STDERR_FILENO; 586 char tpipe[PATH_MAX]; 587 const char *mnt; 588 589 mnt = bpf_find_mntpt("tracefs", TRACEFS_MAGIC, tracefs_mnt, 590 sizeof(tracefs_mnt), tracefs_known_mnts); 591 if (!mnt) { 592 fprintf(stderr, "tracefs not mounted?\n"); 593 return -1; 594 } 595 596 snprintf(tpipe, sizeof(tpipe), "%s/trace_pipe", mnt); 597 598 fd_in = open(tpipe, O_RDONLY); 599 if (fd_in < 0) 600 return -1; 601 602 fprintf(stderr, "Running! Hang up with ^C!\n\n"); 603 while (1) { 604 static char buff[4096]; 605 ssize_t ret; 606 607 ret = read(fd_in, buff, sizeof(buff)); 608 if (ret > 0 && write(fd_out, buff, ret) == ret) 609 continue; 610 break; 611 } 612 613 close(fd_in); 614 return -1; 615 } 616 617 static int bpf_gen_global(const char *bpf_sub_dir) 618 { 619 char bpf_glo_dir[PATH_MAX]; 620 int ret; 621 622 snprintf(bpf_glo_dir, sizeof(bpf_glo_dir), "%s/%s/", 623 bpf_sub_dir, BPF_DIR_GLOBALS); 624 625 ret = mkdir(bpf_glo_dir, S_IRWXU); 626 if (ret && errno != EEXIST) { 627 fprintf(stderr, "mkdir %s failed: %s\n", bpf_glo_dir, 628 strerror(errno)); 629 return ret; 630 } 631 632 return 0; 633 } 634 635 static int bpf_gen_master(const char *base, const char *name) 636 { 637 char bpf_sub_dir[PATH_MAX]; 638 int ret; 639 640 snprintf(bpf_sub_dir, sizeof(bpf_sub_dir), "%s%s/", base, name); 641 642 ret = mkdir(bpf_sub_dir, S_IRWXU); 643 if (ret && errno != EEXIST) { 644 fprintf(stderr, "mkdir %s failed: %s\n", bpf_sub_dir, 645 strerror(errno)); 646 return ret; 647 } 648 649 return bpf_gen_global(bpf_sub_dir); 650 } 651 652 static int bpf_slave_via_bind_mnt(const char *full_name, 653 const char *full_link) 654 { 655 int ret; 656 657 ret = mkdir(full_name, S_IRWXU); 658 if (ret) { 659 assert(errno != EEXIST); 660 fprintf(stderr, "mkdir %s failed: %s\n", full_name, 661 strerror(errno)); 662 return ret; 663 } 664 665 ret = mount(full_link, full_name, "none", MS_BIND, NULL); 666 if (ret) { 667 rmdir(full_name); 668 fprintf(stderr, "mount --bind %s %s failed: %s\n", 669 full_link, full_name, strerror(errno)); 670 } 671 672 return ret; 673 } 674 675 static int bpf_gen_slave(const char *base, const char *name, 676 const char *link) 677 { 678 char bpf_lnk_dir[PATH_MAX]; 679 char bpf_sub_dir[PATH_MAX]; 680 struct stat sb = {}; 681 int ret; 682 683 snprintf(bpf_lnk_dir, sizeof(bpf_lnk_dir), "%s%s/", base, link); 684 snprintf(bpf_sub_dir, sizeof(bpf_sub_dir), "%s%s", base, name); 685 686 ret = symlink(bpf_lnk_dir, bpf_sub_dir); 687 if (ret) { 688 if (errno != EEXIST) { 689 if (errno != EPERM) { 690 fprintf(stderr, "symlink %s failed: %s\n", 691 bpf_sub_dir, strerror(errno)); 692 return ret; 693 } 694 695 return bpf_slave_via_bind_mnt(bpf_sub_dir, 696 bpf_lnk_dir); 697 } 698 699 ret = lstat(bpf_sub_dir, &sb); 700 if (ret) { 701 fprintf(stderr, "lstat %s failed: %s\n", 702 bpf_sub_dir, strerror(errno)); 703 return ret; 704 } 705 706 if ((sb.st_mode & S_IFMT) != S_IFLNK) 707 return bpf_gen_global(bpf_sub_dir); 708 } 709 710 return 0; 711 } 712 713 static int bpf_gen_hierarchy(const char *base) 714 { 715 int ret, i; 716 717 ret = bpf_gen_master(base, bpf_prog_to_subdir(__bpf_types[0])); 718 for (i = 1; i < ARRAY_SIZE(__bpf_types) && !ret; i++) 719 ret = bpf_gen_slave(base, 720 bpf_prog_to_subdir(__bpf_types[i]), 721 bpf_prog_to_subdir(__bpf_types[0])); 722 return ret; 723 } 724 725 static const char *bpf_get_work_dir(enum bpf_prog_type type) 726 { 727 static char bpf_tmp[PATH_MAX] = BPF_DIR_MNT; 728 static char bpf_wrk_dir[PATH_MAX]; 729 static const char *mnt; 730 static bool bpf_mnt_cached; 731 const char *mnt_env = getenv(BPF_ENV_MNT); 732 static const char * const bpf_known_mnts[] = { 733 BPF_DIR_MNT, 734 "/bpf", 735 0, 736 }; 737 int ret; 738 739 if (bpf_mnt_cached) { 740 const char *out = mnt; 741 742 if (out && type) { 743 snprintf(bpf_tmp, sizeof(bpf_tmp), "%s%s/", 744 out, bpf_prog_to_subdir(type)); 745 out = bpf_tmp; 746 } 747 return out; 748 } 749 750 if (mnt_env) 751 mnt = bpf_find_mntpt_single(BPF_FS_MAGIC, bpf_tmp, 752 sizeof(bpf_tmp), mnt_env); 753 else 754 mnt = bpf_find_mntpt("bpf", BPF_FS_MAGIC, bpf_tmp, 755 sizeof(bpf_tmp), bpf_known_mnts); 756 if (!mnt) { 757 mnt = mnt_env ? : BPF_DIR_MNT; 758 ret = bpf_mnt_check_target(mnt); 759 if (!ret) 760 ret = bpf_mnt_fs(mnt); 761 if (ret) { 762 mnt = NULL; 763 goto out; 764 } 765 } 766 767 snprintf(bpf_wrk_dir, sizeof(bpf_wrk_dir), "%s/", mnt); 768 769 ret = bpf_gen_hierarchy(bpf_wrk_dir); 770 if (ret) { 771 mnt = NULL; 772 goto out; 773 } 774 775 mnt = bpf_wrk_dir; 776 out: 777 bpf_mnt_cached = true; 778 return mnt; 779 } 780 781 static int bpf_obj_get(const char *pathname, enum bpf_prog_type type) 782 { 783 union bpf_attr attr = {}; 784 char tmp[PATH_MAX]; 785 786 if (strlen(pathname) > 2 && pathname[0] == 'm' && 787 pathname[1] == ':' && bpf_get_work_dir(type)) { 788 snprintf(tmp, sizeof(tmp), "%s/%s", 789 bpf_get_work_dir(type), pathname + 2); 790 pathname = tmp; 791 } 792 793 attr.pathname = bpf_ptr_to_u64(pathname); 794 795 return bpf(BPF_OBJ_GET, &attr, sizeof(attr)); 796 } 797 798 static int bpf_obj_pinned(const char *pathname, enum bpf_prog_type type) 799 { 800 int prog_fd = bpf_obj_get(pathname, type); 801 802 if (prog_fd < 0) 803 fprintf(stderr, "Couldn\'t retrieve pinned program \'%s\': %s\n", 804 pathname, strerror(errno)); 805 return prog_fd; 806 } 807 808 enum bpf_mode { 809 CBPF_BYTECODE, 810 CBPF_FILE, 811 EBPF_OBJECT, 812 EBPF_PINNED, 813 BPF_MODE_MAX, 814 }; 815 816 static int bpf_parse(enum bpf_prog_type *type, enum bpf_mode *mode, 817 struct bpf_cfg_in *cfg, const bool *opt_tbl) 818 { 819 const char *file, *section, *uds_name; 820 bool verbose = false; 821 int i, ret, argc; 822 char **argv; 823 824 argv = cfg->argv; 825 argc = cfg->argc; 826 827 if (opt_tbl[CBPF_BYTECODE] && 828 (matches(*argv, "bytecode") == 0 || 829 strcmp(*argv, "bc") == 0)) { 830 *mode = CBPF_BYTECODE; 831 } else if (opt_tbl[CBPF_FILE] && 832 (matches(*argv, "bytecode-file") == 0 || 833 strcmp(*argv, "bcf") == 0)) { 834 *mode = CBPF_FILE; 835 } else if (opt_tbl[EBPF_OBJECT] && 836 (matches(*argv, "object-file") == 0 || 837 strcmp(*argv, "obj") == 0)) { 838 *mode = EBPF_OBJECT; 839 } else if (opt_tbl[EBPF_PINNED] && 840 (matches(*argv, "object-pinned") == 0 || 841 matches(*argv, "pinned") == 0 || 842 matches(*argv, "fd") == 0)) { 843 *mode = EBPF_PINNED; 844 } else { 845 fprintf(stderr, "What mode is \"%s\"?\n", *argv); 846 return -1; 847 } 848 849 NEXT_ARG(); 850 file = section = uds_name = NULL; 851 if (*mode == EBPF_OBJECT || *mode == EBPF_PINNED) { 852 file = *argv; 853 NEXT_ARG_FWD(); 854 855 if (*type == BPF_PROG_TYPE_UNSPEC) { 856 if (argc > 0 && matches(*argv, "type") == 0) { 857 NEXT_ARG(); 858 for (i = 0; i < ARRAY_SIZE(__bpf_prog_meta); 859 i++) { 860 if (!__bpf_prog_meta[i].type) 861 continue; 862 if (!matches(*argv, 863 __bpf_prog_meta[i].type)) { 864 *type = i; 865 break; 866 } 867 } 868 869 if (*type == BPF_PROG_TYPE_UNSPEC) { 870 fprintf(stderr, "What type is \"%s\"?\n", 871 *argv); 872 return -1; 873 } 874 NEXT_ARG_FWD(); 875 } else { 876 *type = BPF_PROG_TYPE_SCHED_CLS; 877 } 878 } 879 880 section = bpf_prog_to_default_section(*type); 881 if (argc > 0 && matches(*argv, "section") == 0) { 882 NEXT_ARG(); 883 section = *argv; 884 NEXT_ARG_FWD(); 885 } 886 887 if (__bpf_prog_meta[*type].may_uds_export) { 888 uds_name = getenv(BPF_ENV_UDS); 889 if (argc > 0 && !uds_name && 890 matches(*argv, "export") == 0) { 891 NEXT_ARG(); 892 uds_name = *argv; 893 NEXT_ARG_FWD(); 894 } 895 } 896 897 if (argc > 0 && matches(*argv, "verbose") == 0) { 898 verbose = true; 899 NEXT_ARG_FWD(); 900 } 901 902 PREV_ARG(); 903 } 904 905 if (*mode == CBPF_BYTECODE || *mode == CBPF_FILE) 906 ret = bpf_ops_parse(argc, argv, cfg->ops, *mode == CBPF_FILE); 907 else if (*mode == EBPF_OBJECT) 908 ret = bpf_obj_open(file, *type, section, verbose); 909 else if (*mode == EBPF_PINNED) 910 ret = bpf_obj_pinned(file, *type); 911 else 912 return -1; 913 914 cfg->object = file; 915 cfg->section = section; 916 cfg->uds = uds_name; 917 cfg->argc = argc; 918 cfg->argv = argv; 919 920 return ret; 921 } 922 923 static int bpf_parse_opt_tbl(enum bpf_prog_type type, struct bpf_cfg_in *cfg, 924 const struct bpf_cfg_ops *ops, void *nl, 925 const bool *opt_tbl) 926 { 927 struct sock_filter opcodes[BPF_MAXINSNS]; 928 char annotation[256]; 929 enum bpf_mode mode; 930 int ret; 931 932 cfg->ops = opcodes; 933 ret = bpf_parse(&type, &mode, cfg, opt_tbl); 934 cfg->ops = NULL; 935 if (ret < 0) 936 return ret; 937 938 if (mode == CBPF_BYTECODE || mode == CBPF_FILE) 939 ops->cbpf_cb(nl, opcodes, ret); 940 if (mode == EBPF_OBJECT || mode == EBPF_PINNED) { 941 snprintf(annotation, sizeof(annotation), "%s:[%s]", 942 basename(cfg->object), mode == EBPF_PINNED ? 943 "*fsobj" : cfg->section); 944 ops->ebpf_cb(nl, ret, annotation); 945 } 946 947 return 0; 948 } 949 950 int bpf_parse_common(enum bpf_prog_type type, struct bpf_cfg_in *cfg, 951 const struct bpf_cfg_ops *ops, void *nl) 952 { 953 bool opt_tbl[BPF_MODE_MAX] = {}; 954 955 if (ops->cbpf_cb) { 956 opt_tbl[CBPF_BYTECODE] = true; 957 opt_tbl[CBPF_FILE] = true; 958 } 959 960 if (ops->ebpf_cb) { 961 opt_tbl[EBPF_OBJECT] = true; 962 opt_tbl[EBPF_PINNED] = true; 963 } 964 965 return bpf_parse_opt_tbl(type, cfg, ops, nl, opt_tbl); 966 } 967 968 int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv) 969 { 970 enum bpf_prog_type type = BPF_PROG_TYPE_UNSPEC; 971 const bool opt_tbl[BPF_MODE_MAX] = { 972 [EBPF_OBJECT] = true, 973 [EBPF_PINNED] = true, 974 }; 975 const struct bpf_elf_map test = { 976 .type = BPF_MAP_TYPE_PROG_ARRAY, 977 .size_key = sizeof(int), 978 .size_value = sizeof(int), 979 }; 980 struct bpf_cfg_in cfg = { 981 .argc = argc, 982 .argv = argv, 983 }; 984 struct bpf_map_ext ext = {}; 985 int ret, prog_fd, map_fd; 986 enum bpf_mode mode; 987 uint32_t map_key; 988 989 prog_fd = bpf_parse(&type, &mode, &cfg, opt_tbl); 990 if (prog_fd < 0) 991 return prog_fd; 992 if (key) { 993 map_key = *key; 994 } else { 995 ret = sscanf(cfg.section, "%*i/%i", &map_key); 996 if (ret != 1) { 997 fprintf(stderr, "Couldn\'t infer map key from section name! Please provide \'key\' argument!\n"); 998 ret = -EINVAL; 999 goto out_prog; 1000 } 1001 } 1002 1003 map_fd = bpf_obj_get(map_path, type); 1004 if (map_fd < 0) { 1005 fprintf(stderr, "Couldn\'t retrieve pinned map \'%s\': %s\n", 1006 map_path, strerror(errno)); 1007 ret = map_fd; 1008 goto out_prog; 1009 } 1010 1011 ret = bpf_map_selfcheck_pinned(map_fd, &test, &ext, 1012 offsetof(struct bpf_elf_map, max_elem), 1013 type); 1014 if (ret < 0) { 1015 fprintf(stderr, "Map \'%s\' self-check failed!\n", map_path); 1016 goto out_map; 1017 } 1018 1019 ret = bpf_map_update(map_fd, &map_key, &prog_fd, BPF_ANY); 1020 if (ret < 0) 1021 fprintf(stderr, "Map update failed: %s\n", strerror(errno)); 1022 out_map: 1023 close(map_fd); 1024 out_prog: 1025 close(prog_fd); 1026 return ret; 1027 } 1028 1029 int bpf_prog_attach_fd(int prog_fd, int target_fd, enum bpf_attach_type type) 1030 { 1031 union bpf_attr attr = {}; 1032 1033 attr.target_fd = target_fd; 1034 attr.attach_bpf_fd = prog_fd; 1035 attr.attach_type = type; 1036 1037 return bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); 1038 } 1039 1040 int bpf_prog_detach_fd(int target_fd, enum bpf_attach_type type) 1041 { 1042 union bpf_attr attr = {}; 1043 1044 attr.target_fd = target_fd; 1045 attr.attach_type = type; 1046 1047 return bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); 1048 } 1049 1050 int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns, 1051 size_t size_insns, const char *license, char *log, 1052 size_t size_log) 1053 { 1054 union bpf_attr attr = {}; 1055 1056 attr.prog_type = type; 1057 attr.insns = bpf_ptr_to_u64(insns); 1058 attr.insn_cnt = size_insns / sizeof(struct bpf_insn); 1059 attr.license = bpf_ptr_to_u64(license); 1060 1061 if (size_log > 0) { 1062 attr.log_buf = bpf_ptr_to_u64(log); 1063 attr.log_size = size_log; 1064 attr.log_level = 1; 1065 } 1066 1067 return bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); 1068 } 1069 1070 #ifdef HAVE_ELF 1071 struct bpf_elf_prog { 1072 enum bpf_prog_type type; 1073 const struct bpf_insn *insns; 1074 size_t size; 1075 const char *license; 1076 }; 1077 1078 struct bpf_hash_entry { 1079 unsigned int pinning; 1080 const char *subpath; 1081 struct bpf_hash_entry *next; 1082 }; 1083 1084 struct bpf_config { 1085 unsigned int jit_enabled; 1086 }; 1087 1088 struct bpf_elf_ctx { 1089 struct bpf_config cfg; 1090 Elf *elf_fd; 1091 GElf_Ehdr elf_hdr; 1092 Elf_Data *sym_tab; 1093 Elf_Data *str_tab; 1094 int obj_fd; 1095 int map_fds[ELF_MAX_MAPS]; 1096 struct bpf_elf_map maps[ELF_MAX_MAPS]; 1097 struct bpf_map_ext maps_ext[ELF_MAX_MAPS]; 1098 int sym_num; 1099 int map_num; 1100 int map_len; 1101 bool *sec_done; 1102 int sec_maps; 1103 char license[ELF_MAX_LICENSE_LEN]; 1104 enum bpf_prog_type type; 1105 bool verbose; 1106 struct bpf_elf_st stat; 1107 struct bpf_hash_entry *ht[256]; 1108 char *log; 1109 size_t log_size; 1110 }; 1111 1112 struct bpf_elf_sec_data { 1113 GElf_Shdr sec_hdr; 1114 Elf_Data *sec_data; 1115 const char *sec_name; 1116 }; 1117 1118 struct bpf_map_data { 1119 int *fds; 1120 const char *obj; 1121 struct bpf_elf_st *st; 1122 struct bpf_elf_map *ent; 1123 }; 1124 1125 static __check_format_string(2, 3) void 1126 bpf_dump_error(struct bpf_elf_ctx *ctx, const char *format, ...) 1127 { 1128 va_list vl; 1129 1130 va_start(vl, format); 1131 vfprintf(stderr, format, vl); 1132 va_end(vl); 1133 1134 if (ctx->log && ctx->log[0]) { 1135 if (ctx->verbose) { 1136 fprintf(stderr, "%s\n", ctx->log); 1137 } else { 1138 unsigned int off = 0, len = strlen(ctx->log); 1139 1140 if (len > BPF_MAX_LOG) { 1141 off = len - BPF_MAX_LOG; 1142 fprintf(stderr, "Skipped %u bytes, use \'verb\' option for the full verbose log.\n[...]\n", 1143 off); 1144 } 1145 fprintf(stderr, "%s\n", ctx->log + off); 1146 } 1147 1148 memset(ctx->log, 0, ctx->log_size); 1149 } 1150 } 1151 1152 static int bpf_log_realloc(struct bpf_elf_ctx *ctx) 1153 { 1154 const size_t log_max = UINT_MAX >> 8; 1155 size_t log_size = ctx->log_size; 1156 void *ptr; 1157 1158 if (!ctx->log) { 1159 log_size = 65536; 1160 } else if (log_size < log_max) { 1161 log_size <<= 1; 1162 if (log_size > log_max) 1163 log_size = log_max; 1164 } else { 1165 return -EINVAL; 1166 } 1167 1168 ptr = realloc(ctx->log, log_size); 1169 if (!ptr) 1170 return -ENOMEM; 1171 1172 ctx->log = ptr; 1173 ctx->log_size = log_size; 1174 1175 return 0; 1176 } 1177 1178 static int bpf_map_create(enum bpf_map_type type, uint32_t size_key, 1179 uint32_t size_value, uint32_t max_elem, 1180 uint32_t flags, int inner_fd) 1181 { 1182 union bpf_attr attr = {}; 1183 1184 attr.map_type = type; 1185 attr.key_size = size_key; 1186 attr.value_size = inner_fd ? sizeof(int) : size_value; 1187 attr.max_entries = max_elem; 1188 attr.map_flags = flags; 1189 attr.inner_map_fd = inner_fd; 1190 1191 return bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); 1192 } 1193 1194 static int bpf_obj_pin(int fd, const char *pathname) 1195 { 1196 union bpf_attr attr = {}; 1197 1198 attr.pathname = bpf_ptr_to_u64(pathname); 1199 attr.bpf_fd = fd; 1200 1201 return bpf(BPF_OBJ_PIN, &attr, sizeof(attr)); 1202 } 1203 1204 static int bpf_obj_hash(const char *object, uint8_t *out, size_t len) 1205 { 1206 struct sockaddr_alg alg = { 1207 .salg_family = AF_ALG, 1208 .salg_type = "hash", 1209 .salg_name = "sha1", 1210 }; 1211 int ret, cfd, ofd, ffd; 1212 struct stat stbuff; 1213 ssize_t size; 1214 1215 if (!object || len != 20) 1216 return -EINVAL; 1217 1218 cfd = socket(AF_ALG, SOCK_SEQPACKET, 0); 1219 if (cfd < 0) { 1220 fprintf(stderr, "Cannot get AF_ALG socket: %s\n", 1221 strerror(errno)); 1222 return cfd; 1223 } 1224 1225 ret = bind(cfd, (struct sockaddr *)&alg, sizeof(alg)); 1226 if (ret < 0) { 1227 fprintf(stderr, "Error binding socket: %s\n", strerror(errno)); 1228 goto out_cfd; 1229 } 1230 1231 ofd = accept(cfd, NULL, 0); 1232 if (ofd < 0) { 1233 fprintf(stderr, "Error accepting socket: %s\n", 1234 strerror(errno)); 1235 ret = ofd; 1236 goto out_cfd; 1237 } 1238 1239 ffd = open(object, O_RDONLY); 1240 if (ffd < 0) { 1241 fprintf(stderr, "Error opening object %s: %s\n", 1242 object, strerror(errno)); 1243 ret = ffd; 1244 goto out_ofd; 1245 } 1246 1247 ret = fstat(ffd, &stbuff); 1248 if (ret < 0) { 1249 fprintf(stderr, "Error doing fstat: %s\n", 1250 strerror(errno)); 1251 goto out_ffd; 1252 } 1253 1254 size = sendfile(ofd, ffd, NULL, stbuff.st_size); 1255 if (size != stbuff.st_size) { 1256 fprintf(stderr, "Error from sendfile (%zd vs %zu bytes): %s\n", 1257 size, stbuff.st_size, strerror(errno)); 1258 ret = -1; 1259 goto out_ffd; 1260 } 1261 1262 size = read(ofd, out, len); 1263 if (size != len) { 1264 fprintf(stderr, "Error from read (%zd vs %zu bytes): %s\n", 1265 size, len, strerror(errno)); 1266 ret = -1; 1267 } else { 1268 ret = 0; 1269 } 1270 out_ffd: 1271 close(ffd); 1272 out_ofd: 1273 close(ofd); 1274 out_cfd: 1275 close(cfd); 1276 return ret; 1277 } 1278 1279 static const char *bpf_get_obj_uid(const char *pathname) 1280 { 1281 static bool bpf_uid_cached; 1282 static char bpf_uid[64]; 1283 uint8_t tmp[20]; 1284 int ret; 1285 1286 if (bpf_uid_cached) 1287 goto done; 1288 1289 ret = bpf_obj_hash(pathname, tmp, sizeof(tmp)); 1290 if (ret) { 1291 fprintf(stderr, "Object hashing failed!\n"); 1292 return NULL; 1293 } 1294 1295 hexstring_n2a(tmp, sizeof(tmp), bpf_uid, sizeof(bpf_uid)); 1296 bpf_uid_cached = true; 1297 done: 1298 return bpf_uid; 1299 } 1300 1301 static int bpf_init_env(const char *pathname) 1302 { 1303 struct rlimit limit = { 1304 .rlim_cur = RLIM_INFINITY, 1305 .rlim_max = RLIM_INFINITY, 1306 }; 1307 1308 /* Don't bother in case we fail! */ 1309 setrlimit(RLIMIT_MEMLOCK, &limit); 1310 1311 if (!bpf_get_work_dir(BPF_PROG_TYPE_UNSPEC)) { 1312 fprintf(stderr, "Continuing without mounted eBPF fs. Too old kernel?\n"); 1313 return 0; 1314 } 1315 1316 if (!bpf_get_obj_uid(pathname)) 1317 return -1; 1318 1319 return 0; 1320 } 1321 1322 static const char *bpf_custom_pinning(const struct bpf_elf_ctx *ctx, 1323 uint32_t pinning) 1324 { 1325 struct bpf_hash_entry *entry; 1326 1327 entry = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)]; 1328 while (entry && entry->pinning != pinning) 1329 entry = entry->next; 1330 1331 return entry ? entry->subpath : NULL; 1332 } 1333 1334 static bool bpf_no_pinning(const struct bpf_elf_ctx *ctx, 1335 uint32_t pinning) 1336 { 1337 switch (pinning) { 1338 case PIN_OBJECT_NS: 1339 case PIN_GLOBAL_NS: 1340 return false; 1341 case PIN_NONE: 1342 return true; 1343 default: 1344 return !bpf_custom_pinning(ctx, pinning); 1345 } 1346 } 1347 1348 static void bpf_make_pathname(char *pathname, size_t len, const char *name, 1349 const struct bpf_elf_ctx *ctx, uint32_t pinning) 1350 { 1351 switch (pinning) { 1352 case PIN_OBJECT_NS: 1353 snprintf(pathname, len, "%s/%s/%s", 1354 bpf_get_work_dir(ctx->type), 1355 bpf_get_obj_uid(NULL), name); 1356 break; 1357 case PIN_GLOBAL_NS: 1358 snprintf(pathname, len, "%s/%s/%s", 1359 bpf_get_work_dir(ctx->type), 1360 BPF_DIR_GLOBALS, name); 1361 break; 1362 default: 1363 snprintf(pathname, len, "%s/../%s/%s", 1364 bpf_get_work_dir(ctx->type), 1365 bpf_custom_pinning(ctx, pinning), name); 1366 break; 1367 } 1368 } 1369 1370 static int bpf_probe_pinned(const char *name, const struct bpf_elf_ctx *ctx, 1371 uint32_t pinning) 1372 { 1373 char pathname[PATH_MAX]; 1374 1375 if (bpf_no_pinning(ctx, pinning) || !bpf_get_work_dir(ctx->type)) 1376 return 0; 1377 1378 bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning); 1379 return bpf_obj_get(pathname, ctx->type); 1380 } 1381 1382 static int bpf_make_obj_path(const struct bpf_elf_ctx *ctx) 1383 { 1384 char tmp[PATH_MAX]; 1385 int ret; 1386 1387 snprintf(tmp, sizeof(tmp), "%s/%s", bpf_get_work_dir(ctx->type), 1388 bpf_get_obj_uid(NULL)); 1389 1390 ret = mkdir(tmp, S_IRWXU); 1391 if (ret && errno != EEXIST) { 1392 fprintf(stderr, "mkdir %s failed: %s\n", tmp, strerror(errno)); 1393 return ret; 1394 } 1395 1396 return 0; 1397 } 1398 1399 static int bpf_make_custom_path(const struct bpf_elf_ctx *ctx, 1400 const char *todo) 1401 { 1402 char tmp[PATH_MAX], rem[PATH_MAX], *sub; 1403 int ret; 1404 1405 snprintf(tmp, sizeof(tmp), "%s/../", bpf_get_work_dir(ctx->type)); 1406 snprintf(rem, sizeof(rem), "%s/", todo); 1407 sub = strtok(rem, "/"); 1408 1409 while (sub) { 1410 if (strlen(tmp) + strlen(sub) + 2 > PATH_MAX) 1411 return -EINVAL; 1412 1413 strcat(tmp, sub); 1414 strcat(tmp, "/"); 1415 1416 ret = mkdir(tmp, S_IRWXU); 1417 if (ret && errno != EEXIST) { 1418 fprintf(stderr, "mkdir %s failed: %s\n", tmp, 1419 strerror(errno)); 1420 return ret; 1421 } 1422 1423 sub = strtok(NULL, "/"); 1424 } 1425 1426 return 0; 1427 } 1428 1429 static int bpf_place_pinned(int fd, const char *name, 1430 const struct bpf_elf_ctx *ctx, uint32_t pinning) 1431 { 1432 char pathname[PATH_MAX]; 1433 const char *tmp; 1434 int ret = 0; 1435 1436 if (bpf_no_pinning(ctx, pinning) || !bpf_get_work_dir(ctx->type)) 1437 return 0; 1438 1439 if (pinning == PIN_OBJECT_NS) 1440 ret = bpf_make_obj_path(ctx); 1441 else if ((tmp = bpf_custom_pinning(ctx, pinning))) 1442 ret = bpf_make_custom_path(ctx, tmp); 1443 if (ret < 0) 1444 return ret; 1445 1446 bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning); 1447 return bpf_obj_pin(fd, pathname); 1448 } 1449 1450 static void bpf_prog_report(int fd, const char *section, 1451 const struct bpf_elf_prog *prog, 1452 struct bpf_elf_ctx *ctx) 1453 { 1454 unsigned int insns = prog->size / sizeof(struct bpf_insn); 1455 1456 fprintf(stderr, "\nProg section \'%s\' %s%s (%d)!\n", section, 1457 fd < 0 ? "rejected: " : "loaded", 1458 fd < 0 ? strerror(errno) : "", 1459 fd < 0 ? errno : fd); 1460 1461 fprintf(stderr, " - Type: %u\n", prog->type); 1462 fprintf(stderr, " - Instructions: %u (%u over limit)\n", 1463 insns, insns > BPF_MAXINSNS ? insns - BPF_MAXINSNS : 0); 1464 fprintf(stderr, " - License: %s\n\n", prog->license); 1465 1466 bpf_dump_error(ctx, "Verifier analysis:\n\n"); 1467 } 1468 1469 static int bpf_prog_attach(const char *section, 1470 const struct bpf_elf_prog *prog, 1471 struct bpf_elf_ctx *ctx) 1472 { 1473 int tries = 0, fd; 1474 retry: 1475 errno = 0; 1476 fd = bpf_prog_load(prog->type, prog->insns, prog->size, 1477 prog->license, ctx->log, ctx->log_size); 1478 if (fd < 0 || ctx->verbose) { 1479 /* The verifier log is pretty chatty, sometimes so chatty 1480 * on larger programs, that we could fail to dump everything 1481 * into our buffer. Still, try to give a debuggable error 1482 * log for the user, so enlarge it and re-fail. 1483 */ 1484 if (fd < 0 && (errno == ENOSPC || !ctx->log_size)) { 1485 if (tries++ < 10 && !bpf_log_realloc(ctx)) 1486 goto retry; 1487 1488 fprintf(stderr, "Log buffer too small to dump verifier log %zu bytes (%d tries)!\n", 1489 ctx->log_size, tries); 1490 return fd; 1491 } 1492 1493 bpf_prog_report(fd, section, prog, ctx); 1494 } 1495 1496 return fd; 1497 } 1498 1499 static void bpf_map_report(int fd, const char *name, 1500 const struct bpf_elf_map *map, 1501 struct bpf_elf_ctx *ctx, int inner_fd) 1502 { 1503 fprintf(stderr, "Map object \'%s\' %s%s (%d)!\n", name, 1504 fd < 0 ? "rejected: " : "loaded", 1505 fd < 0 ? strerror(errno) : "", 1506 fd < 0 ? errno : fd); 1507 1508 fprintf(stderr, " - Type: %u\n", map->type); 1509 fprintf(stderr, " - Identifier: %u\n", map->id); 1510 fprintf(stderr, " - Pinning: %u\n", map->pinning); 1511 fprintf(stderr, " - Size key: %u\n", map->size_key); 1512 fprintf(stderr, " - Size value: %u\n", 1513 inner_fd ? (int)sizeof(int) : map->size_value); 1514 fprintf(stderr, " - Max elems: %u\n", map->max_elem); 1515 fprintf(stderr, " - Flags: %#x\n\n", map->flags); 1516 } 1517 1518 static int bpf_find_map_id(const struct bpf_elf_ctx *ctx, uint32_t id) 1519 { 1520 int i; 1521 1522 for (i = 0; i < ctx->map_num; i++) { 1523 if (ctx->maps[i].id != id) 1524 continue; 1525 if (ctx->map_fds[i] < 0) 1526 return -EINVAL; 1527 1528 return ctx->map_fds[i]; 1529 } 1530 1531 return -ENOENT; 1532 } 1533 1534 static void bpf_report_map_in_map(int outer_fd, uint32_t idx) 1535 { 1536 struct bpf_elf_map outer_map; 1537 int ret; 1538 1539 fprintf(stderr, "Cannot insert map into map! "); 1540 1541 ret = bpf_derive_elf_map_from_fdinfo(outer_fd, &outer_map, NULL); 1542 if (!ret) { 1543 if (idx >= outer_map.max_elem && 1544 outer_map.type == BPF_MAP_TYPE_ARRAY_OF_MAPS) { 1545 fprintf(stderr, "Outer map has %u elements, index %u is invalid!\n", 1546 outer_map.max_elem, idx); 1547 return; 1548 } 1549 } 1550 1551 fprintf(stderr, "Different map specs used for outer and inner map?\n"); 1552 } 1553 1554 static bool bpf_is_map_in_map_type(const struct bpf_elf_map *map) 1555 { 1556 return map->type == BPF_MAP_TYPE_ARRAY_OF_MAPS || 1557 map->type == BPF_MAP_TYPE_HASH_OF_MAPS; 1558 } 1559 1560 static int bpf_map_attach(const char *name, struct bpf_elf_ctx *ctx, 1561 const struct bpf_elf_map *map, struct bpf_map_ext *ext, 1562 int *have_map_in_map) 1563 { 1564 int fd, ret, map_inner_fd = 0; 1565 1566 fd = bpf_probe_pinned(name, ctx, map->pinning); 1567 if (fd > 0) { 1568 ret = bpf_map_selfcheck_pinned(fd, map, ext, 1569 offsetof(struct bpf_elf_map, 1570 id), ctx->type); 1571 if (ret < 0) { 1572 close(fd); 1573 fprintf(stderr, "Map \'%s\' self-check failed!\n", 1574 name); 1575 return ret; 1576 } 1577 if (ctx->verbose) 1578 fprintf(stderr, "Map \'%s\' loaded as pinned!\n", 1579 name); 1580 return fd; 1581 } 1582 1583 if (have_map_in_map && bpf_is_map_in_map_type(map)) { 1584 (*have_map_in_map)++; 1585 if (map->inner_id) 1586 return 0; 1587 fprintf(stderr, "Map \'%s\' cannot be created since no inner map ID defined!\n", 1588 name); 1589 return -EINVAL; 1590 } 1591 1592 if (!have_map_in_map && bpf_is_map_in_map_type(map)) { 1593 map_inner_fd = bpf_find_map_id(ctx, map->inner_id); 1594 if (map_inner_fd < 0) { 1595 fprintf(stderr, "Map \'%s\' cannot be loaded. Inner map with ID %u not found!\n", 1596 name, map->inner_id); 1597 return -EINVAL; 1598 } 1599 } 1600 1601 errno = 0; 1602 fd = bpf_map_create(map->type, map->size_key, map->size_value, 1603 map->max_elem, map->flags, map_inner_fd); 1604 if (fd < 0 || ctx->verbose) { 1605 bpf_map_report(fd, name, map, ctx, map_inner_fd); 1606 if (fd < 0) 1607 return fd; 1608 } 1609 1610 ret = bpf_place_pinned(fd, name, ctx, map->pinning); 1611 if (ret < 0 && errno != EEXIST) { 1612 fprintf(stderr, "Could not pin %s map: %s\n", name, 1613 strerror(errno)); 1614 close(fd); 1615 return ret; 1616 } 1617 1618 return fd; 1619 } 1620 1621 static const char *bpf_str_tab_name(const struct bpf_elf_ctx *ctx, 1622 const GElf_Sym *sym) 1623 { 1624 return ctx->str_tab->d_buf + sym->st_name; 1625 } 1626 1627 static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which) 1628 { 1629 GElf_Sym sym; 1630 int i; 1631 1632 for (i = 0; i < ctx->sym_num; i++) { 1633 if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym) 1634 continue; 1635 1636 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL || 1637 GELF_ST_TYPE(sym.st_info) != STT_NOTYPE || 1638 sym.st_shndx != ctx->sec_maps || 1639 sym.st_value / ctx->map_len != which) 1640 continue; 1641 1642 return bpf_str_tab_name(ctx, &sym); 1643 } 1644 1645 return NULL; 1646 } 1647 1648 static int bpf_maps_attach_all(struct bpf_elf_ctx *ctx) 1649 { 1650 int i, j, ret, fd, inner_fd, inner_idx, have_map_in_map = 0; 1651 const char *map_name; 1652 1653 for (i = 0; i < ctx->map_num; i++) { 1654 map_name = bpf_map_fetch_name(ctx, i); 1655 if (!map_name) 1656 return -EIO; 1657 1658 fd = bpf_map_attach(map_name, ctx, &ctx->maps[i], 1659 &ctx->maps_ext[i], &have_map_in_map); 1660 if (fd < 0) 1661 return fd; 1662 1663 ctx->map_fds[i] = !fd ? -1 : fd; 1664 } 1665 1666 for (i = 0; have_map_in_map && i < ctx->map_num; i++) { 1667 if (ctx->map_fds[i] >= 0) 1668 continue; 1669 1670 map_name = bpf_map_fetch_name(ctx, i); 1671 if (!map_name) 1672 return -EIO; 1673 1674 fd = bpf_map_attach(map_name, ctx, &ctx->maps[i], 1675 &ctx->maps_ext[i], NULL); 1676 if (fd < 0) 1677 return fd; 1678 1679 ctx->map_fds[i] = fd; 1680 } 1681 1682 for (i = 0; have_map_in_map && i < ctx->map_num; i++) { 1683 if (!ctx->maps[i].id || 1684 ctx->maps[i].inner_id || 1685 ctx->maps[i].inner_idx == -1) 1686 continue; 1687 1688 inner_fd = ctx->map_fds[i]; 1689 inner_idx = ctx->maps[i].inner_idx; 1690 1691 for (j = 0; j < ctx->map_num; j++) { 1692 if (!bpf_is_map_in_map_type(&ctx->maps[j])) 1693 continue; 1694 if (ctx->maps[j].inner_id != ctx->maps[i].id) 1695 continue; 1696 1697 ret = bpf_map_update(ctx->map_fds[j], &inner_idx, 1698 &inner_fd, BPF_ANY); 1699 if (ret < 0) { 1700 bpf_report_map_in_map(ctx->map_fds[j], 1701 inner_idx); 1702 return ret; 1703 } 1704 } 1705 } 1706 1707 return 0; 1708 } 1709 1710 static int bpf_map_num_sym(struct bpf_elf_ctx *ctx) 1711 { 1712 int i, num = 0; 1713 GElf_Sym sym; 1714 1715 for (i = 0; i < ctx->sym_num; i++) { 1716 if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym) 1717 continue; 1718 1719 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL || 1720 GELF_ST_TYPE(sym.st_info) != STT_NOTYPE || 1721 sym.st_shndx != ctx->sec_maps) 1722 continue; 1723 num++; 1724 } 1725 1726 return num; 1727 } 1728 1729 static int bpf_fill_section_data(struct bpf_elf_ctx *ctx, int section, 1730 struct bpf_elf_sec_data *data) 1731 { 1732 Elf_Data *sec_edata; 1733 GElf_Shdr sec_hdr; 1734 Elf_Scn *sec_fd; 1735 char *sec_name; 1736 1737 memset(data, 0, sizeof(*data)); 1738 1739 sec_fd = elf_getscn(ctx->elf_fd, section); 1740 if (!sec_fd) 1741 return -EINVAL; 1742 if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr) 1743 return -EIO; 1744 1745 sec_name = elf_strptr(ctx->elf_fd, ctx->elf_hdr.e_shstrndx, 1746 sec_hdr.sh_name); 1747 if (!sec_name || !sec_hdr.sh_size) 1748 return -ENOENT; 1749 1750 sec_edata = elf_getdata(sec_fd, NULL); 1751 if (!sec_edata || elf_getdata(sec_fd, sec_edata)) 1752 return -EIO; 1753 1754 memcpy(&data->sec_hdr, &sec_hdr, sizeof(sec_hdr)); 1755 1756 data->sec_name = sec_name; 1757 data->sec_data = sec_edata; 1758 return 0; 1759 } 1760 1761 struct bpf_elf_map_min { 1762 __u32 type; 1763 __u32 size_key; 1764 __u32 size_value; 1765 __u32 max_elem; 1766 }; 1767 1768 static int bpf_fetch_maps_begin(struct bpf_elf_ctx *ctx, int section, 1769 struct bpf_elf_sec_data *data) 1770 { 1771 ctx->map_num = data->sec_data->d_size; 1772 ctx->sec_maps = section; 1773 ctx->sec_done[section] = true; 1774 1775 if (ctx->map_num > sizeof(ctx->maps)) { 1776 fprintf(stderr, "Too many BPF maps in ELF section!\n"); 1777 return -ENOMEM; 1778 } 1779 1780 memcpy(ctx->maps, data->sec_data->d_buf, ctx->map_num); 1781 return 0; 1782 } 1783 1784 static int bpf_map_verify_all_offs(struct bpf_elf_ctx *ctx, int end) 1785 { 1786 GElf_Sym sym; 1787 int off, i; 1788 1789 for (off = 0; off < end; off += ctx->map_len) { 1790 /* Order doesn't need to be linear here, hence we walk 1791 * the table again. 1792 */ 1793 for (i = 0; i < ctx->sym_num; i++) { 1794 if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym) 1795 continue; 1796 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL || 1797 GELF_ST_TYPE(sym.st_info) != STT_NOTYPE || 1798 sym.st_shndx != ctx->sec_maps) 1799 continue; 1800 if (sym.st_value == off) 1801 break; 1802 if (i == ctx->sym_num - 1) 1803 return -1; 1804 } 1805 } 1806 1807 return off == end ? 0 : -1; 1808 } 1809 1810 static int bpf_fetch_maps_end(struct bpf_elf_ctx *ctx) 1811 { 1812 struct bpf_elf_map fixup[ARRAY_SIZE(ctx->maps)] = {}; 1813 int i, sym_num = bpf_map_num_sym(ctx); 1814 __u8 *buff; 1815 1816 if (sym_num == 0 || sym_num > ARRAY_SIZE(ctx->maps)) { 1817 fprintf(stderr, "%u maps not supported in current map section!\n", 1818 sym_num); 1819 return -EINVAL; 1820 } 1821 1822 if (ctx->map_num % sym_num != 0 || 1823 ctx->map_num % sizeof(__u32) != 0) { 1824 fprintf(stderr, "Number BPF map symbols are not multiple of struct bpf_elf_map!\n"); 1825 return -EINVAL; 1826 } 1827 1828 ctx->map_len = ctx->map_num / sym_num; 1829 if (bpf_map_verify_all_offs(ctx, ctx->map_num)) { 1830 fprintf(stderr, "Different struct bpf_elf_map in use!\n"); 1831 return -EINVAL; 1832 } 1833 1834 if (ctx->map_len == sizeof(struct bpf_elf_map)) { 1835 ctx->map_num = sym_num; 1836 return 0; 1837 } else if (ctx->map_len > sizeof(struct bpf_elf_map)) { 1838 fprintf(stderr, "struct bpf_elf_map not supported, coming from future version?\n"); 1839 return -EINVAL; 1840 } else if (ctx->map_len < sizeof(struct bpf_elf_map_min)) { 1841 fprintf(stderr, "struct bpf_elf_map too small, not supported!\n"); 1842 return -EINVAL; 1843 } 1844 1845 ctx->map_num = sym_num; 1846 for (i = 0, buff = (void *)ctx->maps; i < ctx->map_num; 1847 i++, buff += ctx->map_len) { 1848 /* The fixup leaves the rest of the members as zero, which 1849 * is fine currently, but option exist to set some other 1850 * default value as well when needed in future. 1851 */ 1852 memcpy(&fixup[i], buff, ctx->map_len); 1853 } 1854 1855 memcpy(ctx->maps, fixup, sizeof(fixup)); 1856 1857 printf("Note: %zu bytes struct bpf_elf_map fixup performed due to size mismatch!\n", 1858 sizeof(struct bpf_elf_map) - ctx->map_len); 1859 return 0; 1860 } 1861 1862 static int bpf_fetch_license(struct bpf_elf_ctx *ctx, int section, 1863 struct bpf_elf_sec_data *data) 1864 { 1865 if (data->sec_data->d_size > sizeof(ctx->license)) 1866 return -ENOMEM; 1867 1868 memcpy(ctx->license, data->sec_data->d_buf, data->sec_data->d_size); 1869 ctx->sec_done[section] = true; 1870 return 0; 1871 } 1872 1873 static int bpf_fetch_symtab(struct bpf_elf_ctx *ctx, int section, 1874 struct bpf_elf_sec_data *data) 1875 { 1876 ctx->sym_tab = data->sec_data; 1877 ctx->sym_num = data->sec_hdr.sh_size / data->sec_hdr.sh_entsize; 1878 ctx->sec_done[section] = true; 1879 return 0; 1880 } 1881 1882 static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section, 1883 struct bpf_elf_sec_data *data) 1884 { 1885 ctx->str_tab = data->sec_data; 1886 ctx->sec_done[section] = true; 1887 return 0; 1888 } 1889 1890 static bool bpf_has_map_data(const struct bpf_elf_ctx *ctx) 1891 { 1892 return ctx->sym_tab && ctx->str_tab && ctx->sec_maps; 1893 } 1894 1895 static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx) 1896 { 1897 struct bpf_elf_sec_data data; 1898 int i, ret = -1; 1899 1900 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { 1901 ret = bpf_fill_section_data(ctx, i, &data); 1902 if (ret < 0) 1903 continue; 1904 1905 if (data.sec_hdr.sh_type == SHT_PROGBITS && 1906 !strcmp(data.sec_name, ELF_SECTION_MAPS)) 1907 ret = bpf_fetch_maps_begin(ctx, i, &data); 1908 else if (data.sec_hdr.sh_type == SHT_PROGBITS && 1909 !strcmp(data.sec_name, ELF_SECTION_LICENSE)) 1910 ret = bpf_fetch_license(ctx, i, &data); 1911 else if (data.sec_hdr.sh_type == SHT_SYMTAB && 1912 !strcmp(data.sec_name, ".symtab")) 1913 ret = bpf_fetch_symtab(ctx, i, &data); 1914 else if (data.sec_hdr.sh_type == SHT_STRTAB && 1915 !strcmp(data.sec_name, ".strtab")) 1916 ret = bpf_fetch_strtab(ctx, i, &data); 1917 if (ret < 0) { 1918 fprintf(stderr, "Error parsing section %d! Perhaps check with readelf -a?\n", 1919 i); 1920 return ret; 1921 } 1922 } 1923 1924 if (bpf_has_map_data(ctx)) { 1925 ret = bpf_fetch_maps_end(ctx); 1926 if (ret < 0) { 1927 fprintf(stderr, "Error fixing up map structure, incompatible struct bpf_elf_map used?\n"); 1928 return ret; 1929 } 1930 1931 ret = bpf_maps_attach_all(ctx); 1932 if (ret < 0) { 1933 fprintf(stderr, "Error loading maps into kernel!\n"); 1934 return ret; 1935 } 1936 } 1937 1938 return ret; 1939 } 1940 1941 static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section, 1942 bool *sseen) 1943 { 1944 struct bpf_elf_sec_data data; 1945 struct bpf_elf_prog prog; 1946 int ret, i, fd = -1; 1947 1948 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { 1949 if (ctx->sec_done[i]) 1950 continue; 1951 1952 ret = bpf_fill_section_data(ctx, i, &data); 1953 if (ret < 0 || 1954 !(data.sec_hdr.sh_type == SHT_PROGBITS && 1955 data.sec_hdr.sh_flags & SHF_EXECINSTR && 1956 !strcmp(data.sec_name, section))) 1957 continue; 1958 1959 *sseen = true; 1960 1961 memset(&prog, 0, sizeof(prog)); 1962 prog.type = ctx->type; 1963 prog.insns = data.sec_data->d_buf; 1964 prog.size = data.sec_data->d_size; 1965 prog.license = ctx->license; 1966 1967 fd = bpf_prog_attach(section, &prog, ctx); 1968 if (fd < 0) 1969 return fd; 1970 1971 ctx->sec_done[i] = true; 1972 break; 1973 } 1974 1975 return fd; 1976 } 1977 1978 struct bpf_tail_call_props { 1979 unsigned int total; 1980 unsigned int jited; 1981 }; 1982 1983 static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx, 1984 struct bpf_elf_sec_data *data_relo, 1985 struct bpf_elf_sec_data *data_insn, 1986 struct bpf_tail_call_props *props) 1987 { 1988 Elf_Data *idata = data_insn->sec_data; 1989 GElf_Shdr *rhdr = &data_relo->sec_hdr; 1990 int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize; 1991 struct bpf_insn *insns = idata->d_buf; 1992 unsigned int num_insns = idata->d_size / sizeof(*insns); 1993 1994 for (relo_ent = 0; relo_ent < relo_num; relo_ent++) { 1995 unsigned int ioff, rmap; 1996 GElf_Rel relo; 1997 GElf_Sym sym; 1998 1999 if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo) 2000 return -EIO; 2001 2002 ioff = relo.r_offset / sizeof(struct bpf_insn); 2003 if (ioff >= num_insns || 2004 insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW)) { 2005 fprintf(stderr, "ELF contains relo data for non ld64 instruction at offset %u! Compiler bug?!\n", 2006 ioff); 2007 if (ioff < num_insns && 2008 insns[ioff].code == (BPF_JMP | BPF_CALL)) 2009 fprintf(stderr, " - Try to annotate functions with always_inline attribute!\n"); 2010 return -EINVAL; 2011 } 2012 2013 if (gelf_getsym(ctx->sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym) 2014 return -EIO; 2015 if (sym.st_shndx != ctx->sec_maps) { 2016 fprintf(stderr, "ELF contains non-map related relo data in entry %u pointing to section %u! Compiler bug?!\n", 2017 relo_ent, sym.st_shndx); 2018 return -EIO; 2019 } 2020 2021 rmap = sym.st_value / ctx->map_len; 2022 if (rmap >= ARRAY_SIZE(ctx->map_fds)) 2023 return -EINVAL; 2024 if (!ctx->map_fds[rmap]) 2025 return -EINVAL; 2026 if (ctx->maps[rmap].type == BPF_MAP_TYPE_PROG_ARRAY) { 2027 props->total++; 2028 if (ctx->maps_ext[rmap].owner.jited || 2029 (ctx->maps_ext[rmap].owner.type == 0 && 2030 ctx->cfg.jit_enabled)) 2031 props->jited++; 2032 } 2033 2034 if (ctx->verbose) 2035 fprintf(stderr, "Map \'%s\' (%d) injected into prog section \'%s\' at offset %u!\n", 2036 bpf_str_tab_name(ctx, &sym), ctx->map_fds[rmap], 2037 data_insn->sec_name, ioff); 2038 2039 insns[ioff].src_reg = BPF_PSEUDO_MAP_FD; 2040 insns[ioff].imm = ctx->map_fds[rmap]; 2041 } 2042 2043 return 0; 2044 } 2045 2046 static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section, 2047 bool *lderr, bool *sseen) 2048 { 2049 struct bpf_elf_sec_data data_relo, data_insn; 2050 struct bpf_elf_prog prog; 2051 int ret, idx, i, fd = -1; 2052 2053 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { 2054 struct bpf_tail_call_props props = {}; 2055 2056 ret = bpf_fill_section_data(ctx, i, &data_relo); 2057 if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL) 2058 continue; 2059 2060 idx = data_relo.sec_hdr.sh_info; 2061 2062 ret = bpf_fill_section_data(ctx, idx, &data_insn); 2063 if (ret < 0 || 2064 !(data_insn.sec_hdr.sh_type == SHT_PROGBITS && 2065 data_insn.sec_hdr.sh_flags & SHF_EXECINSTR && 2066 !strcmp(data_insn.sec_name, section))) 2067 continue; 2068 2069 *sseen = true; 2070 2071 ret = bpf_apply_relo_data(ctx, &data_relo, &data_insn, &props); 2072 if (ret < 0) { 2073 *lderr = true; 2074 return ret; 2075 } 2076 2077 memset(&prog, 0, sizeof(prog)); 2078 prog.type = ctx->type; 2079 prog.insns = data_insn.sec_data->d_buf; 2080 prog.size = data_insn.sec_data->d_size; 2081 prog.license = ctx->license; 2082 2083 fd = bpf_prog_attach(section, &prog, ctx); 2084 if (fd < 0) { 2085 *lderr = true; 2086 if (props.total) { 2087 if (ctx->cfg.jit_enabled && 2088 props.total != props.jited) 2089 fprintf(stderr, "JIT enabled, but only %u/%u tail call maps in the program have JITed owner!\n", 2090 props.jited, props.total); 2091 if (!ctx->cfg.jit_enabled && 2092 props.jited) 2093 fprintf(stderr, "JIT disabled, but %u/%u tail call maps in the program have JITed owner!\n", 2094 props.jited, props.total); 2095 } 2096 return fd; 2097 } 2098 2099 ctx->sec_done[i] = true; 2100 ctx->sec_done[idx] = true; 2101 break; 2102 } 2103 2104 return fd; 2105 } 2106 2107 static int bpf_fetch_prog_sec(struct bpf_elf_ctx *ctx, const char *section) 2108 { 2109 bool lderr = false, sseen = false; 2110 int ret = -1; 2111 2112 if (bpf_has_map_data(ctx)) 2113 ret = bpf_fetch_prog_relo(ctx, section, &lderr, &sseen); 2114 if (ret < 0 && !lderr) 2115 ret = bpf_fetch_prog(ctx, section, &sseen); 2116 if (ret < 0 && !sseen) 2117 fprintf(stderr, "Program section \'%s\' not found in ELF file!\n", 2118 section); 2119 return ret; 2120 } 2121 2122 static int bpf_find_map_by_id(struct bpf_elf_ctx *ctx, uint32_t id) 2123 { 2124 int i; 2125 2126 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) 2127 if (ctx->map_fds[i] && ctx->maps[i].id == id && 2128 ctx->maps[i].type == BPF_MAP_TYPE_PROG_ARRAY) 2129 return i; 2130 return -1; 2131 } 2132 2133 struct bpf_jited_aux { 2134 int prog_fd; 2135 int map_fd; 2136 struct bpf_prog_data prog; 2137 struct bpf_map_ext map; 2138 }; 2139 2140 static int bpf_derive_prog_from_fdinfo(int fd, struct bpf_prog_data *prog) 2141 { 2142 char file[PATH_MAX], buff[4096]; 2143 unsigned int val; 2144 FILE *fp; 2145 2146 snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); 2147 memset(prog, 0, sizeof(*prog)); 2148 2149 fp = fopen(file, "r"); 2150 if (!fp) { 2151 fprintf(stderr, "No procfs support?!\n"); 2152 return -EIO; 2153 } 2154 2155 while (fgets(buff, sizeof(buff), fp)) { 2156 if (sscanf(buff, "prog_type:\t%u", &val) == 1) 2157 prog->type = val; 2158 else if (sscanf(buff, "prog_jited:\t%u", &val) == 1) 2159 prog->jited = val; 2160 } 2161 2162 fclose(fp); 2163 return 0; 2164 } 2165 2166 static int bpf_tail_call_get_aux(struct bpf_jited_aux *aux) 2167 { 2168 struct bpf_elf_map tmp; 2169 int ret; 2170 2171 ret = bpf_derive_elf_map_from_fdinfo(aux->map_fd, &tmp, &aux->map); 2172 if (!ret) 2173 ret = bpf_derive_prog_from_fdinfo(aux->prog_fd, &aux->prog); 2174 2175 return ret; 2176 } 2177 2178 static int bpf_fill_prog_arrays(struct bpf_elf_ctx *ctx) 2179 { 2180 struct bpf_elf_sec_data data; 2181 uint32_t map_id, key_id; 2182 int fd, i, ret, idx; 2183 2184 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { 2185 if (ctx->sec_done[i]) 2186 continue; 2187 2188 ret = bpf_fill_section_data(ctx, i, &data); 2189 if (ret < 0) 2190 continue; 2191 2192 ret = sscanf(data.sec_name, "%i/%i", &map_id, &key_id); 2193 if (ret != 2) 2194 continue; 2195 2196 idx = bpf_find_map_by_id(ctx, map_id); 2197 if (idx < 0) 2198 continue; 2199 2200 fd = bpf_fetch_prog_sec(ctx, data.sec_name); 2201 if (fd < 0) 2202 return -EIO; 2203 2204 ret = bpf_map_update(ctx->map_fds[idx], &key_id, 2205 &fd, BPF_ANY); 2206 if (ret < 0) { 2207 struct bpf_jited_aux aux = {}; 2208 2209 ret = -errno; 2210 if (errno == E2BIG) { 2211 fprintf(stderr, "Tail call key %u for map %u out of bounds?\n", 2212 key_id, map_id); 2213 return ret; 2214 } 2215 2216 aux.map_fd = ctx->map_fds[idx]; 2217 aux.prog_fd = fd; 2218 2219 if (bpf_tail_call_get_aux(&aux)) 2220 return ret; 2221 if (!aux.map.owner.type) 2222 return ret; 2223 2224 if (aux.prog.type != aux.map.owner.type) 2225 fprintf(stderr, "Tail call map owned by prog type %u, but prog type is %u!\n", 2226 aux.map.owner.type, aux.prog.type); 2227 if (aux.prog.jited != aux.map.owner.jited) 2228 fprintf(stderr, "Tail call map %s jited, but prog %s!\n", 2229 aux.map.owner.jited ? "is" : "not", 2230 aux.prog.jited ? "is" : "not"); 2231 return ret; 2232 } 2233 2234 ctx->sec_done[i] = true; 2235 } 2236 2237 return 0; 2238 } 2239 2240 static void bpf_save_finfo(struct bpf_elf_ctx *ctx) 2241 { 2242 struct stat st; 2243 int ret; 2244 2245 memset(&ctx->stat, 0, sizeof(ctx->stat)); 2246 2247 ret = fstat(ctx->obj_fd, &st); 2248 if (ret < 0) { 2249 fprintf(stderr, "Stat of elf file failed: %s\n", 2250 strerror(errno)); 2251 return; 2252 } 2253 2254 ctx->stat.st_dev = st.st_dev; 2255 ctx->stat.st_ino = st.st_ino; 2256 } 2257 2258 static int bpf_read_pin_mapping(FILE *fp, uint32_t *id, char *path) 2259 { 2260 char buff[PATH_MAX]; 2261 2262 while (fgets(buff, sizeof(buff), fp)) { 2263 char *ptr = buff; 2264 2265 while (*ptr == ' ' || *ptr == '\t') 2266 ptr++; 2267 2268 if (*ptr == '#' || *ptr == '\n' || *ptr == 0) 2269 continue; 2270 2271 if (sscanf(ptr, "%i %s\n", id, path) != 2 && 2272 sscanf(ptr, "%i %s #", id, path) != 2) { 2273 strcpy(path, ptr); 2274 return -1; 2275 } 2276 2277 return 1; 2278 } 2279 2280 return 0; 2281 } 2282 2283 static bool bpf_pinning_reserved(uint32_t pinning) 2284 { 2285 switch (pinning) { 2286 case PIN_NONE: 2287 case PIN_OBJECT_NS: 2288 case PIN_GLOBAL_NS: 2289 return true; 2290 default: 2291 return false; 2292 } 2293 } 2294 2295 static void bpf_hash_init(struct bpf_elf_ctx *ctx, const char *db_file) 2296 { 2297 struct bpf_hash_entry *entry; 2298 char subpath[PATH_MAX] = {}; 2299 uint32_t pinning; 2300 FILE *fp; 2301 int ret; 2302 2303 fp = fopen(db_file, "r"); 2304 if (!fp) 2305 return; 2306 2307 while ((ret = bpf_read_pin_mapping(fp, &pinning, subpath))) { 2308 if (ret == -1) { 2309 fprintf(stderr, "Database %s is corrupted at: %s\n", 2310 db_file, subpath); 2311 fclose(fp); 2312 return; 2313 } 2314 2315 if (bpf_pinning_reserved(pinning)) { 2316 fprintf(stderr, "Database %s, id %u is reserved - ignoring!\n", 2317 db_file, pinning); 2318 continue; 2319 } 2320 2321 entry = malloc(sizeof(*entry)); 2322 if (!entry) { 2323 fprintf(stderr, "No memory left for db entry!\n"); 2324 continue; 2325 } 2326 2327 entry->pinning = pinning; 2328 entry->subpath = strdup(subpath); 2329 if (!entry->subpath) { 2330 fprintf(stderr, "No memory left for db entry!\n"); 2331 free(entry); 2332 continue; 2333 } 2334 2335 entry->next = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)]; 2336 ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)] = entry; 2337 } 2338 2339 fclose(fp); 2340 } 2341 2342 static void bpf_hash_destroy(struct bpf_elf_ctx *ctx) 2343 { 2344 struct bpf_hash_entry *entry; 2345 int i; 2346 2347 for (i = 0; i < ARRAY_SIZE(ctx->ht); i++) { 2348 while ((entry = ctx->ht[i]) != NULL) { 2349 ctx->ht[i] = entry->next; 2350 free((char *)entry->subpath); 2351 free(entry); 2352 } 2353 } 2354 } 2355 2356 static int bpf_elf_check_ehdr(const struct bpf_elf_ctx *ctx) 2357 { 2358 if (ctx->elf_hdr.e_type != ET_REL || 2359 (ctx->elf_hdr.e_machine != EM_NONE && 2360 ctx->elf_hdr.e_machine != EM_BPF) || 2361 ctx->elf_hdr.e_version != EV_CURRENT) { 2362 fprintf(stderr, "ELF format error, ELF file not for eBPF?\n"); 2363 return -EINVAL; 2364 } 2365 2366 switch (ctx->elf_hdr.e_ident[EI_DATA]) { 2367 default: 2368 fprintf(stderr, "ELF format error, wrong endianness info?\n"); 2369 return -EINVAL; 2370 case ELFDATA2LSB: 2371 if (htons(1) == 1) { 2372 fprintf(stderr, 2373 "We are big endian, eBPF object is little endian!\n"); 2374 return -EIO; 2375 } 2376 break; 2377 case ELFDATA2MSB: 2378 if (htons(1) != 1) { 2379 fprintf(stderr, 2380 "We are little endian, eBPF object is big endian!\n"); 2381 return -EIO; 2382 } 2383 break; 2384 } 2385 2386 return 0; 2387 } 2388 2389 static void bpf_get_cfg(struct bpf_elf_ctx *ctx) 2390 { 2391 static const char *path_jit = "/proc/sys/net/core/bpf_jit_enable"; 2392 int fd; 2393 2394 fd = open(path_jit, O_RDONLY); 2395 if (fd > 0) { 2396 char tmp[16] = {}; 2397 2398 if (read(fd, tmp, sizeof(tmp)) > 0) 2399 ctx->cfg.jit_enabled = atoi(tmp); 2400 close(fd); 2401 } 2402 } 2403 2404 static int bpf_elf_ctx_init(struct bpf_elf_ctx *ctx, const char *pathname, 2405 enum bpf_prog_type type, bool verbose) 2406 { 2407 int ret = -EINVAL; 2408 2409 if (elf_version(EV_CURRENT) == EV_NONE || 2410 bpf_init_env(pathname)) 2411 return ret; 2412 2413 memset(ctx, 0, sizeof(*ctx)); 2414 bpf_get_cfg(ctx); 2415 ctx->verbose = verbose; 2416 ctx->type = type; 2417 2418 ctx->obj_fd = open(pathname, O_RDONLY); 2419 if (ctx->obj_fd < 0) 2420 return ctx->obj_fd; 2421 2422 ctx->elf_fd = elf_begin(ctx->obj_fd, ELF_C_READ, NULL); 2423 if (!ctx->elf_fd) { 2424 ret = -EINVAL; 2425 goto out_fd; 2426 } 2427 2428 if (elf_kind(ctx->elf_fd) != ELF_K_ELF) { 2429 ret = -EINVAL; 2430 goto out_fd; 2431 } 2432 2433 if (gelf_getehdr(ctx->elf_fd, &ctx->elf_hdr) != 2434 &ctx->elf_hdr) { 2435 ret = -EIO; 2436 goto out_elf; 2437 } 2438 2439 ret = bpf_elf_check_ehdr(ctx); 2440 if (ret < 0) 2441 goto out_elf; 2442 2443 ctx->sec_done = calloc(ctx->elf_hdr.e_shnum, 2444 sizeof(*(ctx->sec_done))); 2445 if (!ctx->sec_done) { 2446 ret = -ENOMEM; 2447 goto out_elf; 2448 } 2449 2450 if (ctx->verbose && bpf_log_realloc(ctx)) { 2451 ret = -ENOMEM; 2452 goto out_free; 2453 } 2454 2455 bpf_save_finfo(ctx); 2456 bpf_hash_init(ctx, CONFDIR "/bpf_pinning"); 2457 2458 return 0; 2459 out_free: 2460 free(ctx->sec_done); 2461 out_elf: 2462 elf_end(ctx->elf_fd); 2463 out_fd: 2464 close(ctx->obj_fd); 2465 return ret; 2466 } 2467 2468 static int bpf_maps_count(struct bpf_elf_ctx *ctx) 2469 { 2470 int i, count = 0; 2471 2472 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) { 2473 if (!ctx->map_fds[i]) 2474 break; 2475 count++; 2476 } 2477 2478 return count; 2479 } 2480 2481 static void bpf_maps_teardown(struct bpf_elf_ctx *ctx) 2482 { 2483 int i; 2484 2485 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) { 2486 if (ctx->map_fds[i]) 2487 close(ctx->map_fds[i]); 2488 } 2489 } 2490 2491 static void bpf_elf_ctx_destroy(struct bpf_elf_ctx *ctx, bool failure) 2492 { 2493 if (failure) 2494 bpf_maps_teardown(ctx); 2495 2496 bpf_hash_destroy(ctx); 2497 2498 free(ctx->sec_done); 2499 free(ctx->log); 2500 2501 elf_end(ctx->elf_fd); 2502 close(ctx->obj_fd); 2503 } 2504 2505 static struct bpf_elf_ctx __ctx; 2506 2507 static int bpf_obj_open(const char *pathname, enum bpf_prog_type type, 2508 const char *section, bool verbose) 2509 { 2510 struct bpf_elf_ctx *ctx = &__ctx; 2511 int fd = 0, ret; 2512 2513 ret = bpf_elf_ctx_init(ctx, pathname, type, verbose); 2514 if (ret < 0) { 2515 fprintf(stderr, "Cannot initialize ELF context!\n"); 2516 return ret; 2517 } 2518 2519 ret = bpf_fetch_ancillary(ctx); 2520 if (ret < 0) { 2521 fprintf(stderr, "Error fetching ELF ancillary data!\n"); 2522 goto out; 2523 } 2524 2525 fd = bpf_fetch_prog_sec(ctx, section); 2526 if (fd < 0) { 2527 fprintf(stderr, "Error fetching program/map!\n"); 2528 ret = fd; 2529 goto out; 2530 } 2531 2532 ret = bpf_fill_prog_arrays(ctx); 2533 if (ret < 0) 2534 fprintf(stderr, "Error filling program arrays!\n"); 2535 out: 2536 bpf_elf_ctx_destroy(ctx, ret < 0); 2537 if (ret < 0) { 2538 if (fd) 2539 close(fd); 2540 return ret; 2541 } 2542 2543 return fd; 2544 } 2545 2546 static int 2547 bpf_map_set_send(int fd, struct sockaddr_un *addr, unsigned int addr_len, 2548 const struct bpf_map_data *aux, unsigned int entries) 2549 { 2550 struct bpf_map_set_msg msg = { 2551 .aux.uds_ver = BPF_SCM_AUX_VER, 2552 .aux.num_ent = entries, 2553 }; 2554 int *cmsg_buf, min_fd; 2555 char *amsg_buf; 2556 int i; 2557 2558 strncpy(msg.aux.obj_name, aux->obj, sizeof(msg.aux.obj_name)); 2559 memcpy(&msg.aux.obj_st, aux->st, sizeof(msg.aux.obj_st)); 2560 2561 cmsg_buf = bpf_map_set_init(&msg, addr, addr_len); 2562 amsg_buf = (char *)msg.aux.ent; 2563 2564 for (i = 0; i < entries; i += min_fd) { 2565 int ret; 2566 2567 min_fd = min(BPF_SCM_MAX_FDS * 1U, entries - i); 2568 bpf_map_set_init_single(&msg, min_fd); 2569 2570 memcpy(cmsg_buf, &aux->fds[i], sizeof(aux->fds[0]) * min_fd); 2571 memcpy(amsg_buf, &aux->ent[i], sizeof(aux->ent[0]) * min_fd); 2572 2573 ret = sendmsg(fd, &msg.hdr, 0); 2574 if (ret <= 0) 2575 return ret ? : -1; 2576 } 2577 2578 return 0; 2579 } 2580 2581 static int 2582 bpf_map_set_recv(int fd, int *fds, struct bpf_map_aux *aux, 2583 unsigned int entries) 2584 { 2585 struct bpf_map_set_msg msg; 2586 int *cmsg_buf, min_fd; 2587 char *amsg_buf, *mmsg_buf; 2588 unsigned int needed = 1; 2589 int i; 2590 2591 cmsg_buf = bpf_map_set_init(&msg, NULL, 0); 2592 amsg_buf = (char *)msg.aux.ent; 2593 mmsg_buf = (char *)&msg.aux; 2594 2595 for (i = 0; i < min(entries, needed); i += min_fd) { 2596 struct cmsghdr *cmsg; 2597 int ret; 2598 2599 min_fd = min(entries, entries - i); 2600 bpf_map_set_init_single(&msg, min_fd); 2601 2602 ret = recvmsg(fd, &msg.hdr, 0); 2603 if (ret <= 0) 2604 return ret ? : -1; 2605 2606 cmsg = CMSG_FIRSTHDR(&msg.hdr); 2607 if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS) 2608 return -EINVAL; 2609 if (msg.hdr.msg_flags & MSG_CTRUNC) 2610 return -EIO; 2611 if (msg.aux.uds_ver != BPF_SCM_AUX_VER) 2612 return -ENOSYS; 2613 2614 min_fd = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof(fd); 2615 if (min_fd > entries || min_fd <= 0) 2616 return -EINVAL; 2617 2618 memcpy(&fds[i], cmsg_buf, sizeof(fds[0]) * min_fd); 2619 memcpy(&aux->ent[i], amsg_buf, sizeof(aux->ent[0]) * min_fd); 2620 memcpy(aux, mmsg_buf, offsetof(struct bpf_map_aux, ent)); 2621 2622 needed = aux->num_ent; 2623 } 2624 2625 return 0; 2626 } 2627 2628 int bpf_send_map_fds(const char *path, const char *obj) 2629 { 2630 struct bpf_elf_ctx *ctx = &__ctx; 2631 struct sockaddr_un addr = { .sun_family = AF_UNIX }; 2632 struct bpf_map_data bpf_aux = { 2633 .fds = ctx->map_fds, 2634 .ent = ctx->maps, 2635 .st = &ctx->stat, 2636 .obj = obj, 2637 }; 2638 int fd, ret; 2639 2640 fd = socket(AF_UNIX, SOCK_DGRAM, 0); 2641 if (fd < 0) { 2642 fprintf(stderr, "Cannot open socket: %s\n", 2643 strerror(errno)); 2644 return -1; 2645 } 2646 2647 strncpy(addr.sun_path, path, sizeof(addr.sun_path)); 2648 2649 ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr)); 2650 if (ret < 0) { 2651 fprintf(stderr, "Cannot connect to %s: %s\n", 2652 path, strerror(errno)); 2653 return -1; 2654 } 2655 2656 ret = bpf_map_set_send(fd, &addr, sizeof(addr), &bpf_aux, 2657 bpf_maps_count(ctx)); 2658 if (ret < 0) 2659 fprintf(stderr, "Cannot send fds to %s: %s\n", 2660 path, strerror(errno)); 2661 2662 bpf_maps_teardown(ctx); 2663 close(fd); 2664 return ret; 2665 } 2666 2667 int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux, 2668 unsigned int entries) 2669 { 2670 struct sockaddr_un addr = { .sun_family = AF_UNIX }; 2671 int fd, ret; 2672 2673 fd = socket(AF_UNIX, SOCK_DGRAM, 0); 2674 if (fd < 0) { 2675 fprintf(stderr, "Cannot open socket: %s\n", 2676 strerror(errno)); 2677 return -1; 2678 } 2679 2680 strncpy(addr.sun_path, path, sizeof(addr.sun_path)); 2681 2682 ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr)); 2683 if (ret < 0) { 2684 fprintf(stderr, "Cannot bind to socket: %s\n", 2685 strerror(errno)); 2686 return -1; 2687 } 2688 2689 ret = bpf_map_set_recv(fd, fds, aux, entries); 2690 if (ret < 0) 2691 fprintf(stderr, "Cannot recv fds from %s: %s\n", 2692 path, strerror(errno)); 2693 2694 unlink(addr.sun_path); 2695 close(fd); 2696 return ret; 2697 } 2698 #endif /* HAVE_ELF */ 2699