libbpf: Don't attach perf_buffer to offline/missing CPUs
authorAndrii Nakryiko <andriin@fb.com>
Thu, 12 Dec 2019 01:36:09 +0000 (17:36 -0800)
committerAlexei Starovoitov <ast@kernel.org>
Fri, 13 Dec 2019 21:00:09 +0000 (13:00 -0800)
It's quite common on some systems to have more CPUs enlisted as "possible",
than there are (and could ever be) present/online CPUs. In such cases,
perf_buffer creationg will fail due to inability to create perf event on
missing CPU with error like this:

libbpf: failed to open perf buffer event on cpu #16: No such device

This patch fixes the logic of perf_buffer__new() to ignore CPUs that are
missing or currently offline. In rare cases where user explicitly listed
specific CPUs to connect to, behavior is unchanged: libbpf will try to open
perf event buffer on specified CPU(s) anyways.

Fixes: fb84b8224655 ("libbpf: add perf buffer API")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20191212013609.1691168-1-andriin@fb.com
tools/lib/bpf/libbpf.c

index 98455e8aa6dbbd5d7e5457c421fa32fb88fe152b..27d5f7ecba32ce5774058319da66336bc29d73f7 100644 (file)
@@ -5946,7 +5946,7 @@ struct perf_buffer {
        size_t mmap_size;
        struct perf_cpu_buf **cpu_bufs;
        struct epoll_event *events;
-       int cpu_cnt;
+       int cpu_cnt; /* number of allocated CPU buffers */
        int epoll_fd; /* perf event FD */
        int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
 };
@@ -6080,11 +6080,13 @@ perf_buffer__new_raw(int map_fd, size_t page_cnt,
 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
                                              struct perf_buffer_params *p)
 {
+       const char *online_cpus_file = "/sys/devices/system/cpu/online";
        struct bpf_map_info map = {};
        char msg[STRERR_BUFSIZE];
        struct perf_buffer *pb;
+       bool *online = NULL;
        __u32 map_info_len;
-       int err, i;
+       int err, i, j, n;
 
        if (page_cnt & (page_cnt - 1)) {
                pr_warn("page count should be power of two, but is %zu\n",
@@ -6153,20 +6155,32 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
                goto error;
        }
 
-       for (i = 0; i < pb->cpu_cnt; i++) {
+       err = parse_cpu_mask_file(online_cpus_file, &online, &n);
+       if (err) {
+               pr_warn("failed to get online CPU mask: %d\n", err);
+               goto error;
+       }
+
+       for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
                struct perf_cpu_buf *cpu_buf;
                int cpu, map_key;
 
                cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
                map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
 
+               /* in case user didn't explicitly requested particular CPUs to
+                * be attached to, skip offline/not present CPUs
+                */
+               if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
+                       continue;
+
                cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
                if (IS_ERR(cpu_buf)) {
                        err = PTR_ERR(cpu_buf);
                        goto error;
                }
 
-               pb->cpu_bufs[i] = cpu_buf;
+               pb->cpu_bufs[j] = cpu_buf;
 
                err = bpf_map_update_elem(pb->map_fd, &map_key,
                                          &cpu_buf->fd, 0);
@@ -6178,21 +6192,25 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
                        goto error;
                }
 
-               pb->events[i].events = EPOLLIN;
-               pb->events[i].data.ptr = cpu_buf;
+               pb->events[j].events = EPOLLIN;
+               pb->events[j].data.ptr = cpu_buf;
                if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
-                             &pb->events[i]) < 0) {
+                             &pb->events[j]) < 0) {
                        err = -errno;
                        pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
                                cpu, cpu_buf->fd,
                                libbpf_strerror_r(err, msg, sizeof(msg)));
                        goto error;
                }
+               j++;
        }
+       pb->cpu_cnt = j;
+       free(online);
 
        return pb;
 
 error:
+       free(online);
        if (pb)
                perf_buffer__free(pb);
        return ERR_PTR(err);