bpf: introduce BPF_PROG_TYPE_PERF_EVENT program type
authorAlexei Starovoitov <ast@fb.com>
Fri, 2 Sep 2016 01:37:22 +0000 (18:37 -0700)
committerDavid S. Miller <davem@davemloft.net>
Fri, 2 Sep 2016 17:46:44 +0000 (10:46 -0700)
Introduce BPF_PROG_TYPE_PERF_EVENT programs that can be attached to
HW and SW perf events (PERF_TYPE_HARDWARE and PERF_TYPE_SOFTWARE
correspondingly in uapi/linux/perf_event.h)

The program visible context meta structure is
struct bpf_perf_event_data {
    struct pt_regs regs;
     __u64 sample_period;
};
which is accessible directly from the program:
int bpf_prog(struct bpf_perf_event_data *ctx)
{
  ... ctx->sample_period ...
  ... ctx->regs.ip ...
}

The bpf verifier rewrites the accesses into kernel internal
struct bpf_perf_event_data_kern which allows changing
struct perf_sample_data without affecting bpf programs.
New fields can be added to the end of struct bpf_perf_event_data
in the future.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/linux/perf_event.h
include/uapi/linux/Kbuild
include/uapi/linux/bpf.h
include/uapi/linux/bpf_perf_event.h [new file with mode: 0644]
kernel/trace/bpf_trace.c

index 2b6b43cc0dd5121d8d4f6024f8ec67f862ff3328..97bfe62f30d7b821bc45a000c585877925c2b964 100644 (file)
@@ -788,6 +788,11 @@ struct perf_output_handle {
        int                             page;
 };
 
+struct bpf_perf_event_data_kern {
+       struct pt_regs *regs;
+       struct perf_sample_data *data;
+};
+
 #ifdef CONFIG_CGROUP_PERF
 
 /*
index 185f8ea2702fc6b85335647d7076115c694cdfba..d0352a971ebd6e574b303c4f43af4d17cab0c421 100644 (file)
@@ -71,6 +71,7 @@ header-y += binfmts.h
 header-y += blkpg.h
 header-y += blktrace_api.h
 header-y += bpf_common.h
+header-y += bpf_perf_event.h
 header-y += bpf.h
 header-y += bpqether.h
 header-y += bsg.h
index e4c5a1baa9933e8e2490e29e18237c232998d164..f896dfac4ac0e90bc6c0c5ca1667c8b469317bc2 100644 (file)
@@ -95,6 +95,7 @@ enum bpf_prog_type {
        BPF_PROG_TYPE_SCHED_ACT,
        BPF_PROG_TYPE_TRACEPOINT,
        BPF_PROG_TYPE_XDP,
+       BPF_PROG_TYPE_PERF_EVENT,
 };
 
 #define BPF_PSEUDO_MAP_FD      1
diff --git a/include/uapi/linux/bpf_perf_event.h b/include/uapi/linux/bpf_perf_event.h
new file mode 100644 (file)
index 0000000..0674272
--- /dev/null
@@ -0,0 +1,18 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _UAPI__LINUX_BPF_PERF_EVENT_H__
+#define _UAPI__LINUX_BPF_PERF_EVENT_H__
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+
+struct bpf_perf_event_data {
+       struct pt_regs regs;
+       __u64 sample_period;
+};
+
+#endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */
index ad35213b840512ad962d3e710d95cb2cb1330788..d3869b03d9fe4850ca2be5010812717460d9c132 100644 (file)
@@ -1,4 +1,5 @@
 /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2016 Facebook
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -8,6 +9,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/bpf.h>
+#include <linux/bpf_perf_event.h>
 #include <linux/filter.h>
 #include <linux/uaccess.h>
 #include <linux/ctype.h>
@@ -552,10 +554,69 @@ static struct bpf_prog_type_list tracepoint_tl = {
        .type   = BPF_PROG_TYPE_TRACEPOINT,
 };
 
+static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
+                                   enum bpf_reg_type *reg_type)
+{
+       if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
+               return false;
+       if (type != BPF_READ)
+               return false;
+       if (off % size != 0)
+               return false;
+       if (off == offsetof(struct bpf_perf_event_data, sample_period)) {
+               if (size != sizeof(u64))
+                       return false;
+       } else {
+               if (size != sizeof(long))
+                       return false;
+       }
+       return true;
+}
+
+static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, int dst_reg,
+                                     int src_reg, int ctx_off,
+                                     struct bpf_insn *insn_buf,
+                                     struct bpf_prog *prog)
+{
+       struct bpf_insn *insn = insn_buf;
+
+       switch (ctx_off) {
+       case offsetof(struct bpf_perf_event_data, sample_period):
+               BUILD_BUG_ON(FIELD_SIZEOF(struct perf_sample_data, period) != sizeof(u64));
+               *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct bpf_perf_event_data_kern, data)),
+                                     dst_reg, src_reg,
+                                     offsetof(struct bpf_perf_event_data_kern, data));
+               *insn++ = BPF_LDX_MEM(BPF_DW, dst_reg, dst_reg,
+                                     offsetof(struct perf_sample_data, period));
+               break;
+       default:
+               *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct bpf_perf_event_data_kern, regs)),
+                                     dst_reg, src_reg,
+                                     offsetof(struct bpf_perf_event_data_kern, regs));
+               *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(sizeof(long)),
+                                     dst_reg, dst_reg, ctx_off);
+               break;
+       }
+
+       return insn - insn_buf;
+}
+
+static const struct bpf_verifier_ops perf_event_prog_ops = {
+       .get_func_proto         = tp_prog_func_proto,
+       .is_valid_access        = pe_prog_is_valid_access,
+       .convert_ctx_access     = pe_prog_convert_ctx_access,
+};
+
+static struct bpf_prog_type_list perf_event_tl = {
+       .ops    = &perf_event_prog_ops,
+       .type   = BPF_PROG_TYPE_PERF_EVENT,
+};
+
 static int __init register_kprobe_prog_ops(void)
 {
        bpf_register_prog_type(&kprobe_tl);
        bpf_register_prog_type(&tracepoint_tl);
+       bpf_register_prog_type(&perf_event_tl);
        return 0;
 }
 late_initcall(register_kprobe_prog_ops);