bpf: Sharing bpf runtime stats with BPF_ENABLE_STATS

author Song Liu <songliubraving@fb.com>

Thu, 30 Apr 2020 07:15:04 +0000 (00:15 -0700)

committer Alexei Starovoitov <ast@kernel.org>

Fri, 1 May 2020 17:36:32 +0000 (10:36 -0700)
author Song Liu <songliubraving@fb.com>
Thu, 30 Apr 2020 07:15:04 +0000 (00:15 -0700)
committer Alexei Starovoitov <ast@kernel.org>
Fri, 1 May 2020 17:36:32 +0000 (10:36 -0700)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h

index c07b1d2f38245e0b72aa4a200584abb988103670..1262ec460ab3565cdeed8b7162cd8d97177a97b4 100644 (file)
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -987,6 +987,7 @@ _out:                                                       \
  
  #ifdef CONFIG_BPF_SYSCALL
  DECLARE_PER_CPU(int, bpf_prog_active);
+extern struct mutex bpf_stats_enabled_mutex;
  
  /*
   * Block execution of BPF programs attached to instrumentation (perf,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h

index 0eccafae55bbc1b589a22e6308cbc4557d04f74a..705e4822f997865b96556048f2325b26e149ae3d 100644 (file)
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -115,6 +115,7 @@ enum bpf_cmd {
         BPF_LINK_UPDATE,
         BPF_LINK_GET_FD_BY_ID,
         BPF_LINK_GET_NEXT_ID,
+       BPF_ENABLE_STATS,
  };
  
  enum bpf_map_type {
@@ -390,6 +391,12 @@ enum {
   */
  #define BPF_F_QUERY_EFFECTIVE  (1U << 0)
  
+/* type for BPF_ENABLE_STATS */
+enum bpf_stats_type {
+       /* enabled run_time_ns and run_cnt */
+       BPF_STATS_RUN_TIME = 0,
+};
+
  enum bpf_stack_build_id_status {
         /* user space need an empty entry to identify end of a trace */
         BPF_STACK_BUILD_ID_EMPTY = 0,
@@ -601,6 +608,10 @@ union bpf_attr {
                 __u32           old_prog_fd;
         } link_update;
  
+       struct { /* struct used by BPF_ENABLE_STATS command */
+               __u32           type;
+       } enable_stats;
+
  } __attribute__((aligned(8)));
  
  /* The description below is an attempt at providing documentation to eBPF
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c

index c75b2dd2459c886c7a6b88a015fabd4b0546ade4..4f34eecec9cef3b02dd622927e5d51fe78512076 100644 (file)
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3872,6 +3872,60 @@ static int bpf_link_get_fd_by_id(const union bpf_attr *attr)
         return fd;
  }
  
+DEFINE_MUTEX(bpf_stats_enabled_mutex);
+
+static int bpf_stats_release(struct inode *inode, struct file *file)
+{
+       mutex_lock(&bpf_stats_enabled_mutex);
+       static_key_slow_dec(&bpf_stats_enabled_key.key);
+       mutex_unlock(&bpf_stats_enabled_mutex);
+       return 0;
+}
+
+static const struct file_operations bpf_stats_fops = {
+       .release = bpf_stats_release,
+};
+
+static int bpf_enable_runtime_stats(void)
+{
+       int fd;
+
+       mutex_lock(&bpf_stats_enabled_mutex);
+
+       /* Set a very high limit to avoid overflow */
+       if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 2) {
+               mutex_unlock(&bpf_stats_enabled_mutex);
+               return -EBUSY;
+       }
+
+       fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC);
+       if (fd >= 0)
+               static_key_slow_inc(&bpf_stats_enabled_key.key);
+
+       mutex_unlock(&bpf_stats_enabled_mutex);
+       return fd;
+}
+
+#define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type
+
+static int bpf_enable_stats(union bpf_attr *attr)
+{
+
+       if (CHECK_ATTR(BPF_ENABLE_STATS))
+               return -EINVAL;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       switch (attr->enable_stats.type) {
+       case BPF_STATS_RUN_TIME:
+               return bpf_enable_runtime_stats();
+       default:
+               break;
+       }
+       return -EINVAL;
+}
+
  SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
  {
         union bpf_attr attr;
@@ -3996,6 +4050,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
                 err = bpf_obj_get_next_id(&attr, uattr,
                                           &link_idr, &link_idr_lock);
                 break;
+       case BPF_ENABLE_STATS:
+               err = bpf_enable_stats(&attr);
+               break;
         default:
                 err = -EINVAL;
                 break;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c

index e961286d0e145e54244defe8ad56edbf61ee4ed6..7adfe5dbce9d3fa395d8a7353e0b37dad7b52709 100644 (file)
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -201,6 +201,40 @@ static int max_extfrag_threshold = 1000;
  
  #endif /* CONFIG_SYSCTL */
  
+#ifdef CONFIG_BPF_SYSCALL
+static int bpf_stats_handler(struct ctl_table *table, int write,
+                            void __user *buffer, size_t *lenp,
+                            loff_t *ppos)
+{
+       struct static_key *key = (struct static_key *)table->data;
+       static int saved_val;
+       int val, ret;
+       struct ctl_table tmp = {
+               .data   = &val,
+               .maxlen = sizeof(val),
+               .mode   = table->mode,
+               .extra1 = SYSCTL_ZERO,
+               .extra2 = SYSCTL_ONE,
+       };
+
+       if (write && !capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       mutex_lock(&bpf_stats_enabled_mutex);
+       val = saved_val;
+       ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+       if (write && !ret && val != saved_val) {
+               if (val)
+                       static_key_slow_inc(key);
+               else
+                       static_key_slow_dec(key);
+               saved_val = val;
+       }
+       mutex_unlock(&bpf_stats_enabled_mutex);
+       return ret;
+}
+#endif
+
  /*
   * /proc/sys support
   */
@@ -2549,7 +2583,7 @@ static struct ctl_table kern_table[] = {
                 .data           = &bpf_stats_enabled_key.key,
                 .maxlen         = sizeof(bpf_stats_enabled_key),
                 .mode           = 0644,
-               .proc_handler   = proc_do_static_key,
+               .proc_handler   = bpf_stats_handler,
         },
  #endif
  #if defined(CONFIG_TREE_RCU)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h

index 0eccafae55bbc1b589a22e6308cbc4557d04f74a..705e4822f997865b96556048f2325b26e149ae3d 100644 (file)
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -115,6 +115,7 @@ enum bpf_cmd {
         BPF_LINK_UPDATE,
         BPF_LINK_GET_FD_BY_ID,
         BPF_LINK_GET_NEXT_ID,
+       BPF_ENABLE_STATS,
  };
  
  enum bpf_map_type {
@@ -390,6 +391,12 @@ enum {
   */
  #define BPF_F_QUERY_EFFECTIVE  (1U << 0)
  
+/* type for BPF_ENABLE_STATS */
+enum bpf_stats_type {
+       /* enabled run_time_ns and run_cnt */
+       BPF_STATS_RUN_TIME = 0,
+};
+
  enum bpf_stack_build_id_status {
         /* user space need an empty entry to identify end of a trace */
         BPF_STACK_BUILD_ID_EMPTY = 0,
@@ -601,6 +608,10 @@ union bpf_attr {
                 __u32           old_prog_fd;
         } link_update;
  
+       struct { /* struct used by BPF_ENABLE_STATS command */
+               __u32           type;
+       } enable_stats;
+
  } __attribute__((aligned(8)));
  
  /* The description below is an attempt at providing documentation to eBPF
author	Song Liu <songliubraving@fb.com>
	Thu, 30 Apr 2020 07:15:04 +0000 (00:15 -0700)
committer	Alexei Starovoitov <ast@kernel.org>
	Fri, 1 May 2020 17:36:32 +0000 (10:36 -0700)
include/linux/bpf.h		patch \| blob \| history
include/uapi/linux/bpf.h		patch \| blob \| history
kernel/bpf/syscall.c		patch \| blob \| history
kernel/sysctl.c		patch \| blob \| history
tools/include/uapi/linux/bpf.h		patch \| blob \| history