bpf: introduce bpf subcommand BPF_TASK_FD_QUERY
authorYonghong Song <yhs@fb.com>
Thu, 24 May 2018 18:21:09 +0000 (11:21 -0700)
committerAlexei Starovoitov <ast@kernel.org>
Fri, 25 May 2018 01:18:19 +0000 (18:18 -0700)
Currently, suppose a userspace application has loaded a bpf program
and attached it to a tracepoint/kprobe/uprobe, and a bpf
introspection tool, e.g., bpftool, wants to show which bpf program
is attached to which tracepoint/kprobe/uprobe. Such attachment
information will be really useful to understand the overall bpf
deployment in the system.

There is a name field (16 bytes) for each program, which could
be used to encode the attachment point. There are some drawbacks
for this approaches. First, bpftool user (e.g., an admin) may not
really understand the association between the name and the
attachment point. Second, if one program is attached to multiple
places, encoding a proper name which can imply all these
attachments becomes difficult.

This patch introduces a new bpf subcommand BPF_TASK_FD_QUERY.
Given a pid and fd, if the <pid, fd> is associated with a
tracepoint/kprobe/uprobe perf event, BPF_TASK_FD_QUERY will return
   . prog_id
   . tracepoint name, or
   . k[ret]probe funcname + offset or kernel addr, or
   . u[ret]probe filename + offset
to the userspace.
The user can use "bpftool prog" to find more information about
bpf program itself with prog_id.

Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
include/linux/trace_events.h
include/uapi/linux/bpf.h
kernel/bpf/syscall.c
kernel/trace/bpf_trace.c
kernel/trace/trace_kprobe.c
kernel/trace/trace_uprobe.c

index 2bde3eff564cdde138c8d22da4d4d8d125e2ed3d..d34144a3c5b5e3a9bb732a00979be64c670a4d42 100644 (file)
@@ -473,6 +473,9 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info);
 int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
 int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
 struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name);
+int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
+                           u32 *fd_type, const char **buf,
+                           u64 *probe_offset, u64 *probe_addr);
 #else
 static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
 {
@@ -504,6 +507,13 @@ static inline struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name
 {
        return NULL;
 }
+static inline int bpf_get_perf_event_info(const struct perf_event *event,
+                                         u32 *prog_id, u32 *fd_type,
+                                         const char **buf, u64 *probe_offset,
+                                         u64 *probe_addr)
+{
+       return -EOPNOTSUPP;
+}
 #endif
 
 enum {
@@ -560,10 +570,17 @@ extern void perf_trace_del(struct perf_event *event, int flags);
 #ifdef CONFIG_KPROBE_EVENTS
 extern int  perf_kprobe_init(struct perf_event *event, bool is_retprobe);
 extern void perf_kprobe_destroy(struct perf_event *event);
+extern int bpf_get_kprobe_info(const struct perf_event *event,
+                              u32 *fd_type, const char **symbol,
+                              u64 *probe_offset, u64 *probe_addr,
+                              bool perf_type_tracepoint);
 #endif
 #ifdef CONFIG_UPROBE_EVENTS
 extern int  perf_uprobe_init(struct perf_event *event, bool is_retprobe);
 extern void perf_uprobe_destroy(struct perf_event *event);
+extern int bpf_get_uprobe_info(const struct perf_event *event,
+                              u32 *fd_type, const char **filename,
+                              u64 *probe_offset, bool perf_type_tracepoint);
 #endif
 extern int  ftrace_profile_set_filter(struct perf_event *event, int event_id,
                                     char *filter_str);
index e95fec90c2c199fb909201f13c8ea2aa8886dd19..9b8c6e310e9a9666aefec27c74451954711d6bde 100644 (file)
@@ -97,6 +97,7 @@ enum bpf_cmd {
        BPF_RAW_TRACEPOINT_OPEN,
        BPF_BTF_LOAD,
        BPF_BTF_GET_FD_BY_ID,
+       BPF_TASK_FD_QUERY,
 };
 
 enum bpf_map_type {
@@ -380,6 +381,22 @@ union bpf_attr {
                __u32           btf_log_size;
                __u32           btf_log_level;
        };
+
+       struct {
+               __u32           pid;            /* input: pid */
+               __u32           fd;             /* input: fd */
+               __u32           flags;          /* input: flags */
+               __u32           buf_len;        /* input/output: buf len */
+               __aligned_u64   buf;            /* input/output:
+                                                *   tp_name for tracepoint
+                                                *   symbol for kprobe
+                                                *   filename for uprobe
+                                                */
+               __u32           prog_id;        /* output: prod_id */
+               __u32           fd_type;        /* output: BPF_FD_TYPE_* */
+               __u64           probe_offset;   /* output: probe_offset */
+               __u64           probe_addr;     /* output: probe_addr */
+       } task_fd_query;
 } __attribute__((aligned(8)));
 
 /* The description below is an attempt at providing documentation to eBPF
@@ -2557,4 +2574,13 @@ struct bpf_fib_lookup {
        __u8    dmac[6];     /* ETH_ALEN */
 };
 
+enum bpf_task_fd_type {
+       BPF_FD_TYPE_RAW_TRACEPOINT,     /* tp name */
+       BPF_FD_TYPE_TRACEPOINT,         /* tp name */
+       BPF_FD_TYPE_KPROBE,             /* (symbol + offset) or addr */
+       BPF_FD_TYPE_KRETPROBE,          /* (symbol + offset) or addr */
+       BPF_FD_TYPE_UPROBE,             /* filename + offset */
+       BPF_FD_TYPE_URETPROBE,          /* filename + offset */
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
index 788456c186178acf0a46c545eb99268d73f5b916..388d4feda3486d8a86cd6b687f502716844c1929 100644 (file)
@@ -18,7 +18,9 @@
 #include <linux/vmalloc.h>
 #include <linux/mmzone.h>
 #include <linux/anon_inodes.h>
+#include <linux/fdtable.h>
 #include <linux/file.h>
+#include <linux/fs.h>
 #include <linux/license.h>
 #include <linux/filter.h>
 #include <linux/version.h>
@@ -2178,6 +2180,132 @@ static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
        return btf_get_fd_by_id(attr->btf_id);
 }
 
+static int bpf_task_fd_query_copy(const union bpf_attr *attr,
+                                   union bpf_attr __user *uattr,
+                                   u32 prog_id, u32 fd_type,
+                                   const char *buf, u64 probe_offset,
+                                   u64 probe_addr)
+{
+       char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf);
+       u32 len = buf ? strlen(buf) : 0, input_len;
+       int err = 0;
+
+       if (put_user(len, &uattr->task_fd_query.buf_len))
+               return -EFAULT;
+       input_len = attr->task_fd_query.buf_len;
+       if (input_len && ubuf) {
+               if (!len) {
+                       /* nothing to copy, just make ubuf NULL terminated */
+                       char zero = '\0';
+
+                       if (put_user(zero, ubuf))
+                               return -EFAULT;
+               } else if (input_len >= len + 1) {
+                       /* ubuf can hold the string with NULL terminator */
+                       if (copy_to_user(ubuf, buf, len + 1))
+                               return -EFAULT;
+               } else {
+                       /* ubuf cannot hold the string with NULL terminator,
+                        * do a partial copy with NULL terminator.
+                        */
+                       char zero = '\0';
+
+                       err = -ENOSPC;
+                       if (copy_to_user(ubuf, buf, input_len - 1))
+                               return -EFAULT;
+                       if (put_user(zero, ubuf + input_len - 1))
+                               return -EFAULT;
+               }
+       }
+
+       if (put_user(prog_id, &uattr->task_fd_query.prog_id) ||
+           put_user(fd_type, &uattr->task_fd_query.fd_type) ||
+           put_user(probe_offset, &uattr->task_fd_query.probe_offset) ||
+           put_user(probe_addr, &uattr->task_fd_query.probe_addr))
+               return -EFAULT;
+
+       return err;
+}
+
+#define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr
+
+static int bpf_task_fd_query(const union bpf_attr *attr,
+                            union bpf_attr __user *uattr)
+{
+       pid_t pid = attr->task_fd_query.pid;
+       u32 fd = attr->task_fd_query.fd;
+       const struct perf_event *event;
+       struct files_struct *files;
+       struct task_struct *task;
+       struct file *file;
+       int err;
+
+       if (CHECK_ATTR(BPF_TASK_FD_QUERY))
+               return -EINVAL;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (attr->task_fd_query.flags != 0)
+               return -EINVAL;
+
+       task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
+       if (!task)
+               return -ENOENT;
+
+       files = get_files_struct(task);
+       put_task_struct(task);
+       if (!files)
+               return -ENOENT;
+
+       err = 0;
+       spin_lock(&files->file_lock);
+       file = fcheck_files(files, fd);
+       if (!file)
+               err = -EBADF;
+       else
+               get_file(file);
+       spin_unlock(&files->file_lock);
+       put_files_struct(files);
+
+       if (err)
+               goto out;
+
+       if (file->f_op == &bpf_raw_tp_fops) {
+               struct bpf_raw_tracepoint *raw_tp = file->private_data;
+               struct bpf_raw_event_map *btp = raw_tp->btp;
+
+               err = bpf_task_fd_query_copy(attr, uattr,
+                                            raw_tp->prog->aux->id,
+                                            BPF_FD_TYPE_RAW_TRACEPOINT,
+                                            btp->tp->name, 0, 0);
+               goto put_file;
+       }
+
+       event = perf_get_event(file);
+       if (!IS_ERR(event)) {
+               u64 probe_offset, probe_addr;
+               u32 prog_id, fd_type;
+               const char *buf;
+
+               err = bpf_get_perf_event_info(event, &prog_id, &fd_type,
+                                             &buf, &probe_offset,
+                                             &probe_addr);
+               if (!err)
+                       err = bpf_task_fd_query_copy(attr, uattr, prog_id,
+                                                    fd_type, buf,
+                                                    probe_offset,
+                                                    probe_addr);
+               goto put_file;
+       }
+
+       err = -ENOTSUPP;
+put_file:
+       fput(file);
+out:
+       return err;
+}
+
 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 {
        union bpf_attr attr = {};
@@ -2264,6 +2392,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
        case BPF_BTF_GET_FD_BY_ID:
                err = bpf_btf_get_fd_by_id(&attr);
                break;
+       case BPF_TASK_FD_QUERY:
+               err = bpf_task_fd_query(&attr, uattr);
+               break;
        default:
                err = -EINVAL;
                break;
index ce2cbbff27e45cb7a3cbd52470faa2bb78c18ed9..81fdf2fc94ac9a954702f5d312a415a33972a012 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/uaccess.h>
 #include <linux/ctype.h>
 #include <linux/kprobes.h>
+#include <linux/syscalls.h>
 #include <linux/error-injection.h>
 
 #include "trace_probe.h"
@@ -1163,3 +1164,50 @@ int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
        mutex_unlock(&bpf_event_mutex);
        return err;
 }
+
+int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
+                           u32 *fd_type, const char **buf,
+                           u64 *probe_offset, u64 *probe_addr)
+{
+       bool is_tracepoint, is_syscall_tp;
+       struct bpf_prog *prog;
+       int flags, err = 0;
+
+       prog = event->prog;
+       if (!prog)
+               return -ENOENT;
+
+       /* not supporting BPF_PROG_TYPE_PERF_EVENT yet */
+       if (prog->type == BPF_PROG_TYPE_PERF_EVENT)
+               return -EOPNOTSUPP;
+
+       *prog_id = prog->aux->id;
+       flags = event->tp_event->flags;
+       is_tracepoint = flags & TRACE_EVENT_FL_TRACEPOINT;
+       is_syscall_tp = is_syscall_trace_event(event->tp_event);
+
+       if (is_tracepoint || is_syscall_tp) {
+               *buf = is_tracepoint ? event->tp_event->tp->name
+                                    : event->tp_event->name;
+               *fd_type = BPF_FD_TYPE_TRACEPOINT;
+               *probe_offset = 0x0;
+               *probe_addr = 0x0;
+       } else {
+               /* kprobe/uprobe */
+               err = -EOPNOTSUPP;
+#ifdef CONFIG_KPROBE_EVENTS
+               if (flags & TRACE_EVENT_FL_KPROBE)
+                       err = bpf_get_kprobe_info(event, fd_type, buf,
+                                                 probe_offset, probe_addr,
+                                                 event->attr.type == PERF_TYPE_TRACEPOINT);
+#endif
+#ifdef CONFIG_UPROBE_EVENTS
+               if (flags & TRACE_EVENT_FL_UPROBE)
+                       err = bpf_get_uprobe_info(event, fd_type, buf,
+                                                 probe_offset,
+                                                 event->attr.type == PERF_TYPE_TRACEPOINT);
+#endif
+       }
+
+       return err;
+}
index 02aed76e0978801428ff33f1b8a421a8deb34f14..daa81571b22a4646bcc6400ccee0fe638dda2515 100644 (file)
@@ -1287,6 +1287,35 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
                              head, NULL);
 }
 NOKPROBE_SYMBOL(kretprobe_perf_func);
+
+int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type,
+                       const char **symbol, u64 *probe_offset,
+                       u64 *probe_addr, bool perf_type_tracepoint)
+{
+       const char *pevent = trace_event_name(event->tp_event);
+       const char *group = event->tp_event->class->system;
+       struct trace_kprobe *tk;
+
+       if (perf_type_tracepoint)
+               tk = find_trace_kprobe(pevent, group);
+       else
+               tk = event->tp_event->data;
+       if (!tk)
+               return -EINVAL;
+
+       *fd_type = trace_kprobe_is_return(tk) ? BPF_FD_TYPE_KRETPROBE
+                                             : BPF_FD_TYPE_KPROBE;
+       if (tk->symbol) {
+               *symbol = tk->symbol;
+               *probe_offset = tk->rp.kp.offset;
+               *probe_addr = 0;
+       } else {
+               *symbol = NULL;
+               *probe_offset = 0;
+               *probe_addr = (unsigned long)tk->rp.kp.addr;
+       }
+       return 0;
+}
 #endif /* CONFIG_PERF_EVENTS */
 
 /*
index ac892878dbe60d8f4ac527be8fc1f8d39a2958b9..bf89a51e740dd402dd82e066083b9462f48bafad 100644 (file)
@@ -1161,6 +1161,28 @@ static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func,
 {
        __uprobe_perf_func(tu, func, regs, ucb, dsize);
 }
+
+int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type,
+                       const char **filename, u64 *probe_offset,
+                       bool perf_type_tracepoint)
+{
+       const char *pevent = trace_event_name(event->tp_event);
+       const char *group = event->tp_event->class->system;
+       struct trace_uprobe *tu;
+
+       if (perf_type_tracepoint)
+               tu = find_probe_event(pevent, group);
+       else
+               tu = event->tp_event->data;
+       if (!tu)
+               return -EINVAL;
+
+       *fd_type = is_ret_probe(tu) ? BPF_FD_TYPE_URETPROBE
+                                   : BPF_FD_TYPE_UPROBE;
+       *filename = tu->filename;
+       *probe_offset = tu->offset;
+       return 0;
+}
 #endif /* CONFIG_PERF_EVENTS */
 
 static int