perf_counter: add an mmap method to allow userspace to read hardware counters

author Paul Mackerras <paulus@samba.org>

Mon, 23 Mar 2009 17:22:08 +0000 (18:22 +0100)

committer Ingo Molnar <mingo@elte.hu>

Mon, 6 Apr 2009 07:30:26 +0000 (09:30 +0200)
author Paul Mackerras <paulus@samba.org>
Mon, 23 Mar 2009 17:22:08 +0000 (18:22 +0100)
committer Ingo Molnar <mingo@elte.hu>
Mon, 6 Apr 2009 07:30:26 +0000 (09:30 +0200)
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c

index d05651584d439cbb581fb9d2504c978b02ae38a5..e4349281b07def8cc3d09e088dda7e0d6d120edf 100644 (file)
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -417,6 +417,8 @@ void hw_perf_restore(u64 disable)
                 atomic64_set(&counter->hw.prev_count, val);
                 counter->hw.idx = hwc_index[i] + 1;
                 write_pmc(counter->hw.idx, val);
+               if (counter->user_page)
+                       perf_counter_update_userpage(counter);
         }
         mb();
         cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
@@ -572,6 +574,8 @@ static void power_perf_disable(struct perf_counter *counter)
                         ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
                         write_pmc(counter->hw.idx, 0);
                         counter->hw.idx = 0;
+                       if (counter->user_page)
+                               perf_counter_update_userpage(counter);
                         break;
                 }
         }
@@ -698,6 +702,8 @@ static void record_and_restart(struct perf_counter *counter, long val,
         write_pmc(counter->hw.idx, val);
         atomic64_set(&counter->hw.prev_count, val);
         atomic64_set(&counter->hw.period_left, left);
+       if (counter->user_page)
+               perf_counter_update_userpage(counter);
  
         /*
          * Finally record data if requested.
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h

index 18dc17d0a61cc050416f68d4ce8b1e9302322637..40b324e91bf68f165e8f8386fe70494b4a32e4c7 100644 (file)
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -143,6 +143,17 @@ struct perf_counter_hw_event {
  #define PERF_COUNTER_IOC_ENABLE                _IO('$', 0)
  #define PERF_COUNTER_IOC_DISABLE       _IO('$', 1)
  
+/*
+ * Structure of the page that can be mapped via mmap
+ */
+struct perf_counter_mmap_page {
+       __u32   version;                /* version number of this structure */
+       __u32   compat_version;         /* lowest version this is compat with */
+       __u32   lock;                   /* seqlock for synchronization */
+       __u32   index;                  /* hardware counter identifier */
+       __s64   offset;                 /* add to hardware counter value */
+};
+
  #ifdef __KERNEL__
  /*
   * Kernel-internal data types and definitions:
@@ -278,6 +289,9 @@ struct perf_counter {
         int                             oncpu;
         int                             cpu;
  
+       /* pointer to page shared with userspace via mmap */
+       unsigned long                   user_page;
+
         /* read() / irq related data */
         wait_queue_head_t               waitq;
         /* optional: for NMIs */
@@ -361,6 +375,7 @@ extern int perf_counter_task_enable(void);
  extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
                struct perf_cpu_context *cpuctx,
                struct perf_counter_context *ctx, int cpu);
+extern void perf_counter_update_userpage(struct perf_counter *counter);
  
  extern void perf_counter_output(struct perf_counter *counter,
                                 int nmi, struct pt_regs *regs);
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c

index ce34bff07bdaaf16fe8b79b5e0427fd0de13ee73..d9cfd902140e00011c65775dc7e262ec0576d426 100644 (file)
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1177,6 +1177,7 @@ static int perf_release(struct inode *inode, struct file *file)
         mutex_unlock(&counter->mutex);
         mutex_unlock(&ctx->mutex);
  
+       free_page(counter->user_page);
         free_counter(counter);
         put_context(ctx);
  
@@ -1346,12 +1347,87 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
         return err;
  }
  
+void perf_counter_update_userpage(struct perf_counter *counter)
+{
+       struct perf_counter_mmap_page *userpg;
+
+       if (!counter->user_page)
+               return;
+       userpg = (struct perf_counter_mmap_page *) counter->user_page;
+
+       ++userpg->lock;
+       smp_wmb();
+       userpg->index = counter->hw.idx;
+       userpg->offset = atomic64_read(&counter->count);
+       if (counter->state == PERF_COUNTER_STATE_ACTIVE)
+               userpg->offset -= atomic64_read(&counter->hw.prev_count);
+       smp_wmb();
+       ++userpg->lock;
+}
+
+static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+       struct perf_counter *counter = vma->vm_file->private_data;
+
+       if (!counter->user_page)
+               return VM_FAULT_SIGBUS;
+
+       vmf->page = virt_to_page(counter->user_page);
+       get_page(vmf->page);
+       return 0;
+}
+
+static struct vm_operations_struct perf_mmap_vmops = {
+       .fault = perf_mmap_fault,
+};
+
+static int perf_mmap(struct file *file, struct vm_area_struct *vma)
+{
+       struct perf_counter *counter = file->private_data;
+       unsigned long userpg;
+
+       if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE))
+               return -EINVAL;
+       if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+               return -EINVAL;
+
+       /*
+        * For now, restrict to the case of a hardware counter
+        * on the current task.
+        */
+       if (is_software_counter(counter) || counter->task != current)
+               return -EINVAL;
+
+       userpg = counter->user_page;
+       if (!userpg) {
+               userpg = get_zeroed_page(GFP_KERNEL);
+               mutex_lock(&counter->mutex);
+               if (counter->user_page) {
+                       free_page(userpg);
+                       userpg = counter->user_page;
+               } else {
+                       counter->user_page = userpg;
+               }
+               mutex_unlock(&counter->mutex);
+               if (!userpg)
+                       return -ENOMEM;
+       }
+
+       perf_counter_update_userpage(counter);
+
+       vma->vm_flags &= ~VM_MAYWRITE;
+       vma->vm_flags |= VM_RESERVED;
+       vma->vm_ops = &perf_mmap_vmops;
+       return 0;
+}
+
  static const struct file_operations perf_fops = {
         .release                = perf_release,
         .read                   = perf_read,
         .poll                   = perf_poll,
         .unlocked_ioctl         = perf_ioctl,
         .compat_ioctl           = perf_ioctl,
+       .mmap                   = perf_mmap,
  };
  
  /*
author	Paul Mackerras <paulus@samba.org>
	Mon, 23 Mar 2009 17:22:08 +0000 (18:22 +0100)
committer	Ingo Molnar <mingo@elte.hu>
	Mon, 6 Apr 2009 07:30:26 +0000 (09:30 +0200)
arch/powerpc/kernel/perf_counter.c		patch \| blob \| history
include/linux/perf_counter.h		patch \| blob \| history
kernel/perf_counter.c		patch \| blob \| history