From 41e9a8046c92e26a68fdf5a4cb831b7c60113602 Mon Sep 17 00:00:00 2001 From: Teng Qin Date: Fri, 2 Jun 2017 21:03:53 -0700 Subject: [PATCH] samples/bpf: add tests for more perf event types $ trace_event tests attaching BPF program to HW_CPU_CYCLES, SW_CPU_CLOCK, HW_CACHE_L1D and other events. It runs 'dd' in the background while bpf program collects user and kernel stack trace on counter overflow. User space expects to see sys_read and sys_write in the kernel stack. $ tracex6 tests reading of various perf counters from BPF program. Both tests were refactored to increase coverage and be more accurate. Signed-off-by: Teng Qin Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/bpf_helpers.h | 3 +- samples/bpf/trace_event_user.c | 73 +++++++++++-- samples/bpf/tracex6_kern.c | 28 +++-- samples/bpf/tracex6_user.c | 180 ++++++++++++++++++++++++++------- 4 files changed, 228 insertions(+), 56 deletions(-) diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index 9a9c95f2c9fb..51e567bc70fc 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -31,7 +31,8 @@ static unsigned long long (*bpf_get_current_uid_gid)(void) = (void *) BPF_FUNC_get_current_uid_gid; static int (*bpf_get_current_comm)(void *buf, int buf_size) = (void *) BPF_FUNC_get_current_comm; -static int (*bpf_perf_event_read)(void *map, int index) = +static unsigned long long (*bpf_perf_event_read)(void *map, + unsigned long long flags) = (void *) BPF_FUNC_perf_event_read; static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) = (void *) BPF_FUNC_clone_redirect; diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c index fa4336423da5..7bd827b84a67 100644 --- a/samples/bpf/trace_event_user.c +++ b/samples/bpf/trace_event_user.c @@ -75,7 +75,10 @@ static void print_stack(struct key_t *key, __u64 count) for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--) print_addr(ip[i]); } - printf("\n"); + if (count < 6) + printf("\r"); + else + printf("\n"); if (key->kernstack == -EEXIST && !warned) { printf("stackmap collisions seen. Consider increasing size\n"); @@ -105,7 +108,7 @@ static void print_stacks(void) bpf_map_delete_elem(fd, &next_key); key = next_key; } - + printf("\n"); if (!sys_read_seen || !sys_write_seen) { printf("BUG kernel stack doesn't contain sys_read() and sys_write()\n"); int_exit(0); @@ -122,24 +125,29 @@ static void test_perf_event_all_cpu(struct perf_event_attr *attr) { int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); int *pmu_fd = malloc(nr_cpus * sizeof(int)); - int i; + int i, error = 0; /* open perf_event on all cpus */ for (i = 0; i < nr_cpus; i++) { pmu_fd[i] = sys_perf_event_open(attr, -1, i, -1, 0); if (pmu_fd[i] < 0) { printf("sys_perf_event_open failed\n"); + error = 1; goto all_cpu_err; } assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); - assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0) == 0); + assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE) == 0); } - system("dd if=/dev/zero of=/dev/null count=5000k"); + system("dd if=/dev/zero of=/dev/null count=5000k status=none"); print_stacks(); all_cpu_err: - for (i--; i >= 0; i--) + for (i--; i >= 0; i--) { + ioctl(pmu_fd[i], PERF_EVENT_IOC_DISABLE); close(pmu_fd[i]); + } free(pmu_fd); + if (error) + int_exit(0); } static void test_perf_event_task(struct perf_event_attr *attr) @@ -150,12 +158,13 @@ static void test_perf_event_task(struct perf_event_attr *attr) pmu_fd = sys_perf_event_open(attr, 0, -1, -1, 0); if (pmu_fd < 0) { printf("sys_perf_event_open failed\n"); - return; + int_exit(0); } assert(ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); - assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0) == 0); - system("dd if=/dev/zero of=/dev/null count=5000k"); + assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE) == 0); + system("dd if=/dev/zero of=/dev/null count=5000k status=none"); print_stacks(); + ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); close(pmu_fd); } @@ -175,11 +184,56 @@ static void test_bpf_perf_event(void) .config = PERF_COUNT_SW_CPU_CLOCK, .inherit = 1, }; + struct perf_event_attr attr_hw_cache_l1d = { + .sample_freq = SAMPLE_FREQ, + .freq = 1, + .type = PERF_TYPE_HW_CACHE, + .config = + PERF_COUNT_HW_CACHE_L1D | + (PERF_COUNT_HW_CACHE_OP_READ << 8) | + (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), + .inherit = 1, + }; + struct perf_event_attr attr_hw_cache_branch_miss = { + .sample_freq = SAMPLE_FREQ, + .freq = 1, + .type = PERF_TYPE_HW_CACHE, + .config = + PERF_COUNT_HW_CACHE_BPU | + (PERF_COUNT_HW_CACHE_OP_READ << 8) | + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), + .inherit = 1, + }; + struct perf_event_attr attr_type_raw = { + .sample_freq = SAMPLE_FREQ, + .freq = 1, + .type = PERF_TYPE_RAW, + /* Intel Instruction Retired */ + .config = 0xc0, + .inherit = 1, + }; + printf("Test HW_CPU_CYCLES\n"); test_perf_event_all_cpu(&attr_type_hw); test_perf_event_task(&attr_type_hw); + + printf("Test SW_CPU_CLOCK\n"); test_perf_event_all_cpu(&attr_type_sw); test_perf_event_task(&attr_type_sw); + + printf("Test HW_CACHE_L1D\n"); + test_perf_event_all_cpu(&attr_hw_cache_l1d); + test_perf_event_task(&attr_hw_cache_l1d); + + printf("Test HW_CACHE_BPU\n"); + test_perf_event_all_cpu(&attr_hw_cache_branch_miss); + test_perf_event_task(&attr_hw_cache_branch_miss); + + printf("Test Instruction Retired\n"); + test_perf_event_all_cpu(&attr_type_raw); + test_perf_event_task(&attr_type_raw); + + printf("*** PASS ***\n"); } @@ -209,7 +263,6 @@ int main(int argc, char **argv) return 0; } test_bpf_perf_event(); - int_exit(0); return 0; } diff --git a/samples/bpf/tracex6_kern.c b/samples/bpf/tracex6_kern.c index be479c4af9e2..e7d180305974 100644 --- a/samples/bpf/tracex6_kern.c +++ b/samples/bpf/tracex6_kern.c @@ -3,22 +3,36 @@ #include #include "bpf_helpers.h" -struct bpf_map_def SEC("maps") my_map = { +struct bpf_map_def SEC("maps") counters = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(u32), - .max_entries = 32, + .max_entries = 64, +}; +struct bpf_map_def SEC("maps") values = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(int), + .value_size = sizeof(u64), + .max_entries = 64, }; -SEC("kprobe/sys_write") +SEC("kprobe/htab_map_get_next_key") int bpf_prog1(struct pt_regs *ctx) { - u64 count; u32 key = bpf_get_smp_processor_id(); - char fmt[] = "CPU-%d %llu\n"; + u64 count, *val; + s64 error; + + count = bpf_perf_event_read(&counters, key); + error = (s64)count; + if (error <= -2 && error >= -22) + return 0; - count = bpf_perf_event_read(&my_map, key); - bpf_trace_printk(fmt, sizeof(fmt), key, count); + val = bpf_map_lookup_elem(&values, &key); + if (val) + *val = count; + else + bpf_map_update_elem(&values, &key, &count, BPF_NOEXIST); return 0; } diff --git a/samples/bpf/tracex6_user.c b/samples/bpf/tracex6_user.c index ca7874ed77f4..a05a99a0752f 100644 --- a/samples/bpf/tracex6_user.c +++ b/samples/bpf/tracex6_user.c @@ -1,73 +1,177 @@ -#include -#include -#include -#include -#include +#define _GNU_SOURCE + +#include #include -#include -#include #include #include -#include "libbpf.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + #include "bpf_load.h" +#include "libbpf.h" #include "perf-sys.h" #define SAMPLE_PERIOD 0x7fffffffffffffffULL -static void test_bpf_perf_event(void) +static void check_on_cpu(int cpu, struct perf_event_attr *attr) { - int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); - int *pmu_fd = malloc(nr_cpus * sizeof(int)); - int status, i; + int pmu_fd, error = 0; + cpu_set_t set; + __u64 value; - struct perf_event_attr attr_insn_pmu = { + /* Move to target CPU */ + CPU_ZERO(&set); + CPU_SET(cpu, &set); + assert(sched_setaffinity(0, sizeof(set), &set) == 0); + /* Open perf event and attach to the perf_event_array */ + pmu_fd = sys_perf_event_open(attr, -1/*pid*/, cpu/*cpu*/, -1/*group_fd*/, 0); + if (pmu_fd < 0) { + fprintf(stderr, "sys_perf_event_open failed on CPU %d\n", cpu); + error = 1; + goto on_exit; + } + assert(bpf_map_update_elem(map_fd[0], &cpu, &pmu_fd, BPF_ANY) == 0); + assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0) == 0); + /* Trigger the kprobe */ + bpf_map_get_next_key(map_fd[1], &cpu, NULL); + /* Check the value */ + if (bpf_map_lookup_elem(map_fd[1], &cpu, &value)) { + fprintf(stderr, "Value missing for CPU %d\n", cpu); + error = 1; + goto on_exit; + } + fprintf(stderr, "CPU %d: %llu\n", cpu, value); + +on_exit: + assert(bpf_map_delete_elem(map_fd[0], &cpu) == 0 || error); + assert(ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE, 0) == 0 || error); + assert(close(pmu_fd) == 0 || error); + assert(bpf_map_delete_elem(map_fd[1], &cpu) == 0 || error); + exit(error); +} + +static void test_perf_event_array(struct perf_event_attr *attr, + const char *name) +{ + int i, status, nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + pid_t pid[nr_cpus]; + int err = 0; + + printf("Test reading %s counters\n", name); + + for (i = 0; i < nr_cpus; i++) { + pid[i] = fork(); + assert(pid[i] >= 0); + if (pid[i] == 0) { + check_on_cpu(i, attr); + exit(1); + } + } + + for (i = 0; i < nr_cpus; i++) { + assert(waitpid(pid[i], &status, 0) == pid[i]); + err |= status; + } + + if (err) + printf("Test: %s FAILED\n", name); +} + +static void test_bpf_perf_event(void) +{ + struct perf_event_attr attr_cycles = { .freq = 0, .sample_period = SAMPLE_PERIOD, .inherit = 0, .type = PERF_TYPE_HARDWARE, .read_format = 0, .sample_type = 0, - .config = 0,/* PMU: cycles */ + .config = PERF_COUNT_HW_CPU_CYCLES, + }; + struct perf_event_attr attr_clock = { + .freq = 0, + .sample_period = SAMPLE_PERIOD, + .inherit = 0, + .type = PERF_TYPE_SOFTWARE, + .read_format = 0, + .sample_type = 0, + .config = PERF_COUNT_SW_CPU_CLOCK, + }; + struct perf_event_attr attr_raw = { + .freq = 0, + .sample_period = SAMPLE_PERIOD, + .inherit = 0, + .type = PERF_TYPE_RAW, + .read_format = 0, + .sample_type = 0, + /* Intel Instruction Retired */ + .config = 0xc0, + }; + struct perf_event_attr attr_l1d_load = { + .freq = 0, + .sample_period = SAMPLE_PERIOD, + .inherit = 0, + .type = PERF_TYPE_HW_CACHE, + .read_format = 0, + .sample_type = 0, + .config = + PERF_COUNT_HW_CACHE_L1D | + (PERF_COUNT_HW_CACHE_OP_READ << 8) | + (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), + }; + struct perf_event_attr attr_llc_miss = { + .freq = 0, + .sample_period = SAMPLE_PERIOD, + .inherit = 0, + .type = PERF_TYPE_HW_CACHE, + .read_format = 0, + .sample_type = 0, + .config = + PERF_COUNT_HW_CACHE_LL | + (PERF_COUNT_HW_CACHE_OP_READ << 8) | + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), + }; + struct perf_event_attr attr_msr_tsc = { + .freq = 0, + .sample_period = 0, + .inherit = 0, + /* From /sys/bus/event_source/devices/msr/ */ + .type = 7, + .read_format = 0, + .sample_type = 0, + .config = 0, }; - for (i = 0; i < nr_cpus; i++) { - pmu_fd[i] = sys_perf_event_open(&attr_insn_pmu, -1/*pid*/, i/*cpu*/, -1/*group_fd*/, 0); - if (pmu_fd[i] < 0) { - printf("event syscall failed\n"); - goto exit; - } - - bpf_map_update_elem(map_fd[0], &i, &pmu_fd[i], BPF_ANY); - ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0); - } + test_perf_event_array(&attr_cycles, "HARDWARE-cycles"); + test_perf_event_array(&attr_clock, "SOFTWARE-clock"); + test_perf_event_array(&attr_raw, "RAW-instruction-retired"); + test_perf_event_array(&attr_l1d_load, "HW_CACHE-L1D-load"); - status = system("ls > /dev/null"); - if (status) - goto exit; - status = system("sleep 2"); - if (status) - goto exit; - -exit: - for (i = 0; i < nr_cpus; i++) - close(pmu_fd[i]); - close(map_fd[0]); - free(pmu_fd); + /* below tests may fail in qemu */ + test_perf_event_array(&attr_llc_miss, "HW_CACHE-LLC-miss"); + test_perf_event_array(&attr_msr_tsc, "Dynamic-msr-tsc"); } int main(int argc, char **argv) { + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; char filename[256]; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + setrlimit(RLIMIT_MEMLOCK, &r); if (load_bpf_file(filename)) { printf("%s", bpf_log_buf); return 1; } test_bpf_perf_event(); - read_trace_pipe(); - return 0; } -- 2.30.2