From 2993e0515bb44e157c17c9ba7309ba46366b6add Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 19 Nov 2018 15:29:16 -0800 Subject: [PATCH] tools/bpf: add support to read .BTF.ext sections The .BTF section is already available to encode types. These types can be used for map pretty print. The whole .BTF will be passed to the kernel as well for which kernel can verify and return to the user space for pretty print etc. The llvm patch at https://reviews.llvm.org/D53736 will generate .BTF section and one more section .BTF.ext. The .BTF.ext section encodes function type information and line information. Note that this patch set only supports function type info. The functionality is implemented in libbpf. The .BTF section can be directly loaded into the kernel, and the .BTF.ext section cannot. The loader may need to do some relocation and merging, similar to merging multiple code sections, before loading into the kernel. Signed-off-by: Yonghong Song Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf.c | 46 ++++++- tools/lib/bpf/btf.c | 274 +++++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/btf.h | 50 ++++++++ tools/lib/bpf/libbpf.c | 87 ++++++++++--- 4 files changed, 442 insertions(+), 15 deletions(-) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 9b5cf22c4e64..836447bb4f14 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -186,6 +186,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, char *log_buf, size_t log_buf_sz) { union bpf_attr attr; + void *finfo = NULL; __u32 name_len; int fd; @@ -216,12 +217,55 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, if (fd >= 0 || !log_buf || !log_buf_sz) return fd; + /* After bpf_prog_load, the kernel may modify certain attributes + * to give user space a hint how to deal with loading failure. + * Check to see whether we can make some changes and load again. + */ + if (errno == E2BIG && attr.func_info_cnt && + attr.func_info_rec_size < load_attr->func_info_rec_size) { + __u32 actual_rec_size = load_attr->func_info_rec_size; + __u32 expected_rec_size = attr.func_info_rec_size; + __u32 finfo_cnt = load_attr->func_info_cnt; + __u64 finfo_len = actual_rec_size * finfo_cnt; + const void *orecord; + void *nrecord; + int i; + + finfo = malloc(finfo_len); + if (!finfo) + /* further try with log buffer won't help */ + return fd; + + /* zero out bytes kernel does not understand */ + orecord = load_attr->func_info; + nrecord = finfo; + for (i = 0; i < load_attr->func_info_cnt; i++) { + memcpy(nrecord, orecord, expected_rec_size); + memset(nrecord + expected_rec_size, 0, + actual_rec_size - expected_rec_size); + orecord += actual_rec_size; + nrecord += actual_rec_size; + } + + /* try with corrected func info records */ + attr.func_info = ptr_to_u64(finfo); + attr.func_info_rec_size = load_attr->func_info_rec_size; + + fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + + if (fd >= 0 || !log_buf || !log_buf_sz) + goto done; + } + /* Try again with log */ attr.log_buf = ptr_to_u64(log_buf); attr.log_size = log_buf_sz; attr.log_level = 1; log_buf[0] = 0; - return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); +done: + free(finfo); + return fd; } int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 31225e64766f..fe87cb48a6a9 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -37,6 +37,18 @@ struct btf { int fd; }; +struct btf_ext { + void *func_info; + __u32 func_info_rec_size; + __u32 func_info_len; +}; + +/* The minimum bpf_func_info checked by the loader */ +struct bpf_func_info_min { + __u32 insn_offset; + __u32 type_id; +}; + static int btf_add_type(struct btf *btf, struct btf_type *t) { if (btf->types_size - btf->nr_types < 2) { @@ -397,3 +409,265 @@ const char *btf__name_by_offset(const struct btf *btf, __u32 offset) else return NULL; } + +static int btf_ext_validate_func_info(const void *finfo, __u32 size, + btf_print_fn_t err_log) +{ + int sec_hdrlen = sizeof(struct btf_sec_func_info); + __u32 size_left, num_records, record_size; + const struct btf_sec_func_info *sinfo; + __u64 total_record_size; + + /* At least a func_info record size */ + if (size < sizeof(__u32)) { + elog("BTF.ext func_info record size not found"); + return -EINVAL; + } + + /* The record size needs to meet below minimum standard */ + record_size = *(__u32 *)finfo; + if (record_size < sizeof(struct bpf_func_info_min) || + record_size % sizeof(__u32)) { + elog("BTF.ext func_info invalid record size"); + return -EINVAL; + } + + sinfo = finfo + sizeof(__u32); + size_left = size - sizeof(__u32); + + /* If no func_info records, return failure now so .BTF.ext + * won't be used. + */ + if (!size_left) { + elog("BTF.ext no func info records"); + return -EINVAL; + } + + while (size_left) { + if (size_left < sec_hdrlen) { + elog("BTF.ext func_info header not found"); + return -EINVAL; + } + + num_records = sinfo->num_func_info; + if (num_records == 0) { + elog("incorrect BTF.ext num_func_info"); + return -EINVAL; + } + + total_record_size = sec_hdrlen + + (__u64)num_records * record_size; + if (size_left < total_record_size) { + elog("incorrect BTF.ext num_func_info"); + return -EINVAL; + } + + size_left -= total_record_size; + sinfo = (void *)sinfo + total_record_size; + } + + return 0; +} + +static int btf_ext_parse_hdr(__u8 *data, __u32 data_size, + btf_print_fn_t err_log) +{ + const struct btf_ext_header *hdr = (struct btf_ext_header *)data; + __u32 meta_left, last_func_info_pos; + void *finfo; + + if (data_size < offsetof(struct btf_ext_header, func_info_off) || + data_size < hdr->hdr_len) { + elog("BTF.ext header not found"); + return -EINVAL; + } + + if (hdr->magic != BTF_MAGIC) { + elog("Invalid BTF.ext magic:%x\n", hdr->magic); + return -EINVAL; + } + + if (hdr->version != BTF_VERSION) { + elog("Unsupported BTF.ext version:%u\n", hdr->version); + return -ENOTSUP; + } + + if (hdr->flags) { + elog("Unsupported BTF.ext flags:%x\n", hdr->flags); + return -ENOTSUP; + } + + meta_left = data_size - hdr->hdr_len; + if (!meta_left) { + elog("BTF.ext has no data\n"); + return -EINVAL; + } + + if (meta_left < hdr->func_info_off) { + elog("Invalid BTF.ext func_info section offset:%u\n", + hdr->func_info_off); + return -EINVAL; + } + + if (hdr->func_info_off & 0x03) { + elog("BTF.ext func_info section is not aligned to 4 bytes\n"); + return -EINVAL; + } + + last_func_info_pos = hdr->hdr_len + hdr->func_info_off + + hdr->func_info_len; + if (last_func_info_pos > data_size) { + elog("Invalid BTF.ext func_info section size:%u\n", + hdr->func_info_len); + return -EINVAL; + } + + finfo = data + hdr->hdr_len + hdr->func_info_off; + return btf_ext_validate_func_info(finfo, hdr->func_info_len, + err_log); +} + +void btf_ext__free(struct btf_ext *btf_ext) +{ + if (!btf_ext) + return; + + free(btf_ext->func_info); + free(btf_ext); +} + +struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log) +{ + const struct btf_ext_header *hdr; + struct btf_ext *btf_ext; + void *org_fdata, *fdata; + __u32 hdrlen, size_u32; + int err; + + err = btf_ext_parse_hdr(data, size, err_log); + if (err) + return ERR_PTR(err); + + btf_ext = calloc(1, sizeof(struct btf_ext)); + if (!btf_ext) + return ERR_PTR(-ENOMEM); + + hdr = (const struct btf_ext_header *)data; + hdrlen = hdr->hdr_len; + size_u32 = sizeof(__u32); + fdata = malloc(hdr->func_info_len - size_u32); + if (!fdata) { + free(btf_ext); + return ERR_PTR(-ENOMEM); + } + + /* remember record size and copy rest of func_info data */ + org_fdata = data + hdrlen + hdr->func_info_off; + btf_ext->func_info_rec_size = *(__u32 *)org_fdata; + memcpy(fdata, org_fdata + size_u32, hdr->func_info_len - size_u32); + btf_ext->func_info = fdata; + btf_ext->func_info_len = hdr->func_info_len - size_u32; + + return btf_ext; +} + +int btf_ext__reloc_init(struct btf *btf, struct btf_ext *btf_ext, + const char *sec_name, void **func_info, + __u32 *func_info_rec_size, __u32 *func_info_len) +{ + __u32 sec_hdrlen = sizeof(struct btf_sec_func_info); + __u32 i, record_size, records_len; + struct btf_sec_func_info *sinfo; + const char *info_sec_name; + __s64 remain_len; + void *data; + + record_size = btf_ext->func_info_rec_size; + sinfo = btf_ext->func_info; + remain_len = btf_ext->func_info_len; + + while (remain_len > 0) { + records_len = sinfo->num_func_info * record_size; + info_sec_name = btf__name_by_offset(btf, sinfo->sec_name_off); + if (strcmp(info_sec_name, sec_name)) { + remain_len -= sec_hdrlen + records_len; + sinfo = (void *)sinfo + sec_hdrlen + records_len; + continue; + } + + data = malloc(records_len); + if (!data) + return -ENOMEM; + + memcpy(data, sinfo->data, records_len); + + /* adjust the insn_offset, the data in .BTF.ext is + * the actual byte offset, and the kernel expects + * the offset in term of bpf_insn. + * + * adjust the insn offset only, the rest data will + * be passed to kernel. + */ + for (i = 0; i < sinfo->num_func_info; i++) { + struct bpf_func_info_min *record; + + record = data + i * record_size; + record->insn_offset /= sizeof(struct bpf_insn); + } + + *func_info = data; + *func_info_len = records_len; + *func_info_rec_size = record_size; + return 0; + } + + return -EINVAL; +} + +int btf_ext__reloc(struct btf *btf, struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **func_info, __u32 *func_info_len) +{ + __u32 sec_hdrlen = sizeof(struct btf_sec_func_info); + __u32 i, record_size, existing_flen, records_len; + struct btf_sec_func_info *sinfo; + const char *info_sec_name; + __u64 remain_len; + void *data; + + record_size = btf_ext->func_info_rec_size; + sinfo = btf_ext->func_info; + remain_len = btf_ext->func_info_len; + while (remain_len > 0) { + records_len = sinfo->num_func_info * record_size; + info_sec_name = btf__name_by_offset(btf, sinfo->sec_name_off); + if (strcmp(info_sec_name, sec_name)) { + remain_len -= sec_hdrlen + records_len; + sinfo = (void *)sinfo + sec_hdrlen + records_len; + continue; + } + + existing_flen = *func_info_len; + data = realloc(*func_info, existing_flen + records_len); + if (!data) + return -ENOMEM; + + memcpy(data + existing_flen, sinfo->data, records_len); + /* adjust insn_offset only, the rest data will be passed + * to the kernel. + */ + for (i = 0; i < sinfo->num_func_info; i++) { + struct bpf_func_info_min *record; + + record = data + existing_flen + i * record_size; + record->insn_offset = + record->insn_offset / sizeof(struct bpf_insn) + + insns_cnt; + } + *func_info = data; + *func_info_len = existing_flen + records_len; + return 0; + } + + return -EINVAL; +} diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index b77e7080f7e7..578171e8cb26 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -11,10 +11,51 @@ #endif #define BTF_ELF_SEC ".BTF" +#define BTF_EXT_ELF_SEC ".BTF.ext" struct btf; +struct btf_ext; struct btf_type; +/* + * The .BTF.ext ELF section layout defined as + * struct btf_ext_header + * func_info subsection + * + * The func_info subsection layout: + * record size for struct bpf_func_info in the func_info subsection + * struct btf_sec_func_info for section #1 + * a list of bpf_func_info records for section #1 + * where struct bpf_func_info mimics one in include/uapi/linux/bpf.h + * but may not be identical + * struct btf_sec_func_info for section #2 + * a list of bpf_func_info records for section #2 + * ...... + * + * Note that the bpf_func_info record size in .BTF.ext may not + * be the same as the one defined in include/uapi/linux/bpf.h. + * The loader should ensure that record_size meets minimum + * requirement and pass the record as is to the kernel. The + * kernel will handle the func_info properly based on its contents. + */ +struct btf_ext_header { + __u16 magic; + __u8 version; + __u8 flags; + __u32 hdr_len; + + /* All offsets are in bytes relative to the end of this header */ + __u32 func_info_off; + __u32 func_info_len; +}; + +struct btf_sec_func_info { + __u32 sec_name_off; + __u32 num_func_info; + /* Followed by num_func_info number of bpf func_info records */ + __u8 data[0]; +}; + typedef int (*btf_print_fn_t)(const char *, ...) __attribute__((format(printf, 1, 2))); @@ -29,4 +70,13 @@ LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__fd(const struct btf *btf); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); +struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log); +void btf_ext__free(struct btf_ext *btf_ext); +int btf_ext__reloc_init(struct btf *btf, struct btf_ext *btf_ext, + const char *sec_name, void **func_info, + __u32 *func_info_rec_size, __u32 *func_info_len); +int btf_ext__reloc(struct btf *btf, struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, void **func_info, + __u32 *func_info_len); + #endif /* __LIBBPF_BTF_H */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a01eb9584e52..cb6565d79603 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -156,6 +156,10 @@ struct bpf_program { bpf_program_clear_priv_t clear_priv; enum bpf_attach_type expected_attach_type; + int btf_fd; + void *func_info; + __u32 func_info_rec_size; + __u32 func_info_len; }; struct bpf_map { @@ -212,6 +216,7 @@ struct bpf_object { struct list_head list; struct btf *btf; + struct btf_ext *btf_ext; void *priv; bpf_object_clear_priv_t clear_priv; @@ -241,6 +246,9 @@ void bpf_program__unload(struct bpf_program *prog) prog->instances.nr = -1; zfree(&prog->instances.fds); + + zclose(prog->btf_fd); + zfree(&prog->func_info); } static void bpf_program__exit(struct bpf_program *prog) @@ -315,6 +323,7 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx, prog->instances.fds = NULL; prog->instances.nr = -1; prog->type = BPF_PROG_TYPE_KPROBE; + prog->btf_fd = -1; return 0; errout: @@ -807,6 +816,15 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) BTF_ELF_SEC, PTR_ERR(obj->btf)); obj->btf = NULL; } + } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { + obj->btf_ext = btf_ext__new(data->d_buf, data->d_size, + __pr_debug); + if (IS_ERR(obj->btf_ext)) { + pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", + BTF_EXT_ELF_SEC, + PTR_ERR(obj->btf_ext)); + obj->btf_ext = NULL; + } } else if (sh.sh_type == SHT_SYMTAB) { if (obj->efile.symbols) { pr_warning("bpf: multiple SYMTAB in %s\n", @@ -1190,6 +1208,7 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, struct bpf_insn *insn, *new_insn; struct bpf_program *text; size_t new_cnt; + int err; if (relo->type != RELO_CALL) return -LIBBPF_ERRNO__RELOC; @@ -1212,6 +1231,20 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, pr_warning("oom in prog realloc\n"); return -ENOMEM; } + + if (obj->btf && obj->btf_ext) { + err = btf_ext__reloc(obj->btf, obj->btf_ext, + text->section_name, + prog->insns_cnt, + &prog->func_info, + &prog->func_info_len); + if (err) { + pr_warning("error in btf_ext__reloc for sec %s\n", + text->section_name); + return err; + } + } + memcpy(new_insn + prog->insns_cnt, text->insns, text->insns_cnt * sizeof(*insn)); prog->insns = new_insn; @@ -1231,7 +1264,24 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) { int i, err; - if (!prog || !prog->reloc_desc) + if (!prog) + return 0; + + if (obj->btf && obj->btf_ext) { + err = btf_ext__reloc_init(obj->btf, obj->btf_ext, + prog->section_name, + &prog->func_info, + &prog->func_info_rec_size, + &prog->func_info_len); + if (err) { + pr_warning("err in btf_ext__reloc_init for sec %s\n", + prog->section_name); + return err; + } + prog->btf_fd = btf__fd(obj->btf); + } + + if (!prog->reloc_desc) return 0; for (i = 0; i < prog->nr_reloc; i++) { @@ -1319,9 +1369,9 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) } static int -load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type, - const char *name, struct bpf_insn *insns, int insns_cnt, - char *license, __u32 kern_version, int *pfd, int prog_ifindex) +load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, + char *license, __u32 kern_version, int *pfd, + __u32 func_info_cnt) { struct bpf_load_program_attr load_attr; char *cp, errmsg[STRERR_BUFSIZE]; @@ -1329,14 +1379,18 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type, int ret; memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); - load_attr.prog_type = type; - load_attr.expected_attach_type = expected_attach_type; - load_attr.name = name; + load_attr.prog_type = prog->type; + load_attr.expected_attach_type = prog->expected_attach_type; + load_attr.name = prog->name; load_attr.insns = insns; load_attr.insns_cnt = insns_cnt; load_attr.license = license; load_attr.kern_version = kern_version; - load_attr.prog_ifindex = prog_ifindex; + load_attr.prog_ifindex = prog->prog_ifindex; + load_attr.prog_btf_fd = prog->btf_fd; + load_attr.func_info = prog->func_info; + load_attr.func_info_rec_size = prog->func_info_rec_size; + load_attr.func_info_cnt = func_info_cnt; if (!load_attr.insns || !load_attr.insns_cnt) return -EINVAL; @@ -1394,8 +1448,14 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_version) { + __u32 func_info_cnt; int err = 0, fd, i; + if (prog->func_info_len == 0) + func_info_cnt = 0; + else + func_info_cnt = prog->func_info_len / prog->func_info_rec_size; + if (prog->instances.nr < 0 || !prog->instances.fds) { if (prog->preprocessor) { pr_warning("Internal error: can't load program '%s'\n", @@ -1417,10 +1477,9 @@ bpf_program__load(struct bpf_program *prog, pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n", prog->section_name, prog->instances.nr); } - err = load_program(prog->type, prog->expected_attach_type, - prog->name, prog->insns, prog->insns_cnt, + err = load_program(prog, prog->insns, prog->insns_cnt, license, kern_version, &fd, - prog->prog_ifindex); + func_info_cnt); if (!err) prog->instances.fds[0] = fd; goto out; @@ -1448,11 +1507,10 @@ bpf_program__load(struct bpf_program *prog, continue; } - err = load_program(prog->type, prog->expected_attach_type, - prog->name, result.new_insn_ptr, + err = load_program(prog, result.new_insn_ptr, result.new_insn_cnt, license, kern_version, &fd, - prog->prog_ifindex); + func_info_cnt); if (err) { pr_warning("Loading the %dth instance of program '%s' failed\n", @@ -2120,6 +2178,7 @@ void bpf_object__close(struct bpf_object *obj) bpf_object__elf_finish(obj); bpf_object__unload(obj); btf__free(obj->btf); + btf_ext__free(obj->btf_ext); for (i = 0; i < obj->nr_maps; i++) { zfree(&obj->maps[i].name); -- 2.30.2