vfs: implement readahead(2) using POSIX_FADV_WILLNEED
authorAmir Goldstein <amir73il@gmail.com>
Wed, 29 Aug 2018 05:41:29 +0000 (08:41 +0300)
committerMiklos Szeredi <mszeredi@redhat.com>
Thu, 30 Aug 2018 18:01:32 +0000 (20:01 +0200)
The implementation of readahead(2) syscall is identical to that of
fadvise64(POSIX_FADV_WILLNEED) with a few exceptions:
1. readahead(2) returns -EINVAL for !mapping->a_ops and fadvise64()
   ignores the request and returns 0.
2. fadvise64() checks for integer overflow corner case
3. fadvise64() calls the optional filesystem fadvise() file operation

Unite the two implementations by calling vfs_fadvise() from readahead(2)
syscall. Check the !mapping->a_ops in readahead(2) syscall to preserve
documented syscall ABI behaviour.

Suggested-by: Miklos Szeredi <mszeredi@redhat.com>
Fixes: d1d04ef8572b ("ovl: stack file ops")
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
mm/Makefile
mm/fadvise.c
mm/readahead.c

index 8716bdabe1e69f4758d1eedf9c6c8d9d98d2bec2..26ef77a3883b5c708659425229e975ac74069875 100644 (file)
@@ -32,7 +32,7 @@ ifdef CONFIG_CROSS_MEMORY_ATTACH
 mmu-$(CONFIG_MMU)      += process_vm_access.o
 endif
 
-obj-y                  := filemap.o mempool.o oom_kill.o \
+obj-y                  := filemap.o mempool.o oom_kill.o fadvise.o \
                           maccess.o page_alloc.o page-writeback.o \
                           readahead.o swap.o truncate.o vmscan.o shmem.o \
                           util.o mmzone.o vmstat.o backing-dev.o \
@@ -49,7 +49,6 @@ else
        obj-y           += bootmem.o
 endif
 
-obj-$(CONFIG_ADVISE_SYSCALLS)  += fadvise.o
 ifdef CONFIG_MMU
        obj-$(CONFIG_ADVISE_SYSCALLS)   += madvise.o
 endif
index 2f59bac1cb7729d6bb80d4a28b034f4e4bc24c78..467bcd032037a905e991cefe9fe48922f2031036 100644 (file)
@@ -188,6 +188,8 @@ int vfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
 }
 EXPORT_SYMBOL(vfs_fadvise);
 
+#ifdef CONFIG_ADVISE_SYSCALLS
+
 int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
 {
        struct fd f = fdget(fd);
@@ -215,3 +217,4 @@ SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice)
 }
 
 #endif
+#endif
index a59ea70527b9bc46b4fde403a7f19b93493351fb..4e630143a0ba8549c9a63f106c7dd18b7aaa4d18 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/file.h>
 #include <linux/mm_inline.h>
 #include <linux/blk-cgroup.h>
+#include <linux/fadvise.h>
 
 #include "internal.h"
 
@@ -575,24 +576,6 @@ page_cache_async_readahead(struct address_space *mapping,
 }
 EXPORT_SYMBOL_GPL(page_cache_async_readahead);
 
-static ssize_t
-do_readahead(struct address_space *mapping, struct file *filp,
-            pgoff_t index, unsigned long nr)
-{
-       if (!mapping || !mapping->a_ops)
-               return -EINVAL;
-
-       /*
-        * Readahead doesn't make sense for DAX inodes, but we don't want it
-        * to report a failure either.  Instead, we just return success and
-        * don't do any work.
-        */
-       if (dax_mapping(mapping))
-               return 0;
-
-       return force_page_cache_readahead(mapping, filp, index, nr);
-}
-
 ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
 {
        ssize_t ret;
@@ -600,16 +583,22 @@ ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
 
        ret = -EBADF;
        f = fdget(fd);
-       if (f.file) {
-               if (f.file->f_mode & FMODE_READ) {
-                       struct address_space *mapping = f.file->f_mapping;
-                       pgoff_t start = offset >> PAGE_SHIFT;
-                       pgoff_t end = (offset + count - 1) >> PAGE_SHIFT;
-                       unsigned long len = end - start + 1;
-                       ret = do_readahead(mapping, f.file, start, len);
-               }
-               fdput(f);
-       }
+       if (!f.file || !(f.file->f_mode & FMODE_READ))
+               goto out;
+
+       /*
+        * The readahead() syscall is intended to run only on files
+        * that can execute readahead. If readahead is not possible
+        * on this file, then we must return -EINVAL.
+        */
+       ret = -EINVAL;
+       if (!f.file->f_mapping || !f.file->f_mapping->a_ops ||
+           !S_ISREG(file_inode(f.file)->i_mode))
+               goto out;
+
+       ret = vfs_fadvise(f.file, offset, count, POSIX_FADV_WILLNEED);
+out:
+       fdput(f);
        return ret;
 }