userfaultfd: non-cooperative: add madvise() event for MADV_DONTNEED request
authorPavel Emelyanov <xemul@parallels.com>
Wed, 22 Feb 2017 23:42:40 +0000 (15:42 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 23 Feb 2017 00:41:28 +0000 (16:41 -0800)
If the page is punched out of the address space the uffd reader should
know this and zeromap the respective area in case of the #PF event.

Link: http://lkml.kernel.org/r/20161216144821.5183-14-aarcange@redhat.com
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Michael Rapoport <RAPOPORT@il.ibm.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
fs/userfaultfd.c
include/linux/userfaultfd_k.h
include/uapi/linux/userfaultfd.h
mm/madvise.c

index 5d37c37854b094c08acdf5215b804570b15dad43..ea9008254df46f3acb935876468b0c0e9c4e8df5 100644 (file)
@@ -633,6 +633,34 @@ void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *vm_ctx,
        userfaultfd_event_wait_completion(ctx, &ewq);
 }
 
+void madvise_userfault_dontneed(struct vm_area_struct *vma,
+                               struct vm_area_struct **prev,
+                               unsigned long start, unsigned long end)
+{
+       struct mm_struct *mm = vma->vm_mm;
+       struct userfaultfd_ctx *ctx;
+       struct userfaultfd_wait_queue ewq;
+
+       ctx = vma->vm_userfaultfd_ctx.ctx;
+       if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_MADVDONTNEED))
+               return;
+
+       userfaultfd_ctx_get(ctx);
+       up_read(&mm->mmap_sem);
+
+       *prev = NULL; /* We wait for ACK w/o the mmap semaphore */
+
+       msg_init(&ewq.msg);
+
+       ewq.msg.event = UFFD_EVENT_MADVDONTNEED;
+       ewq.msg.arg.madv_dn.start = start;
+       ewq.msg.arg.madv_dn.end = end;
+
+       userfaultfd_event_wait_completion(ctx, &ewq);
+
+       down_read(&mm->mmap_sem);
+}
+
 static int userfaultfd_release(struct inode *inode, struct file *file)
 {
        struct userfaultfd_ctx *ctx = file->private_data;
index 78ec197e8b47ec4611be205259958cc46c4a6b04..f431861f22f1d8fa9a8e5f1353e4774147ceaa2a 100644 (file)
@@ -61,6 +61,11 @@ extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *,
                                        unsigned long from, unsigned long to,
                                        unsigned long len);
 
+extern void madvise_userfault_dontneed(struct vm_area_struct *vma,
+                                      struct vm_area_struct **prev,
+                                      unsigned long start,
+                                      unsigned long end);
+
 #else /* CONFIG_USERFAULTFD */
 
 /* mm helpers */
@@ -106,6 +111,13 @@ static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx,
                                               unsigned long len)
 {
 }
+
+static inline void madvise_userfault_dontneed(struct vm_area_struct *vma,
+                                             struct vm_area_struct **prev,
+                                             unsigned long start,
+                                             unsigned long end)
+{
+}
 #endif /* CONFIG_USERFAULTFD */
 
 #endif /* _LINUX_USERFAULTFD_K_H */
index 79a85e5bd3880d47906dffb476e9a3dbf3019936..2bbf32319cf5e0718f01bcab4ba2224352ce7503 100644 (file)
@@ -19,7 +19,8 @@
  */
 #define UFFD_API ((__u64)0xAA)
 #define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_FORK |       \
-                          UFFD_FEATURE_EVENT_REMAP)
+                          UFFD_FEATURE_EVENT_REMAP |       \
+                          UFFD_FEATURE_EVENT_MADVDONTNEED)
 #define UFFD_API_IOCTLS                                \
        ((__u64)1 << _UFFDIO_REGISTER |         \
         (__u64)1 << _UFFDIO_UNREGISTER |       \
@@ -83,6 +84,11 @@ struct uffd_msg {
                        __u64   len;
                } remap;
 
+               struct {
+                       __u64   start;
+                       __u64   end;
+               } madv_dn;
+
                struct {
                        /* unused reserved fields */
                        __u64   reserved1;
@@ -98,6 +104,7 @@ struct uffd_msg {
 #define UFFD_EVENT_PAGEFAULT   0x12
 #define UFFD_EVENT_FORK                0x13
 #define UFFD_EVENT_REMAP       0x14
+#define UFFD_EVENT_MADVDONTNEED        0x15
 
 /* flags for UFFD_EVENT_PAGEFAULT */
 #define UFFD_PAGEFAULT_FLAG_WRITE      (1<<0)  /* If this was a write fault */
@@ -119,6 +126,7 @@ struct uffdio_api {
 #define UFFD_FEATURE_PAGEFAULT_FLAG_WP         (1<<0)
 #define UFFD_FEATURE_EVENT_FORK                        (1<<1)
 #define UFFD_FEATURE_EVENT_REMAP               (1<<2)
+#define UFFD_FEATURE_EVENT_MADVDONTNEED                (1<<3)
        __u64 features;
 
        __u64 ioctls;
index 0e3828eae9f875a0df39421ed4043cdc1f41404d..06ffb5a170de427fae250136a04e4794257311c2 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/syscalls.h>
 #include <linux/mempolicy.h>
 #include <linux/page-isolation.h>
+#include <linux/userfaultfd_k.h>
 #include <linux/hugetlb.h>
 #include <linux/falloc.h>
 #include <linux/sched.h>
@@ -477,6 +478,7 @@ static long madvise_dontneed(struct vm_area_struct *vma,
                return -EINVAL;
 
        zap_page_range(vma, start, end - start, NULL);
+       madvise_userfault_dontneed(vma, prev, start, end);
        return 0;
 }