NFS: Fix unstable write completion
authorTrond Myklebust <trond.myklebust@primarydata.com>
Wed, 7 Mar 2018 20:22:31 +0000 (15:22 -0500)
committerTrond Myklebust <trond.myklebust@primarydata.com>
Thu, 8 Mar 2018 17:56:32 +0000 (12:56 -0500)
We do want to respect the FLUSH_SYNC argument to nfs_commit_inode() to
ensure that all outstanding COMMIT requests to the inode in question are
complete. Currently we may exit early from both nfs_commit_inode() and
nfs_write_inode() even if there are COMMIT requests in flight, or unstable
writes on the commit list.

In order to get the right semantics w.r.t. sync_inode(), we don't need
to have nfs_commit_inode() reset the inode dirty flags when called from
nfs_wb_page() and/or nfs_wb_all(). We just need to ensure that
nfs_write_inode() leaves them in the right state if there are outstanding
commits, or stable pages.

Reported-by: Scott Mayhew <smayhew@redhat.com>
Fixes: dc4fd9ab01ab ("nfs: don't wait on commit in nfs_commit_inode()...")
Cc: stable@vger.kernel.org # v4.14+
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
fs/nfs/write.c

index 7428a669d7a77b5fd82a7b8da348308ac058d155..e7d8ceae8f26b4ad5f6b9c68df14f13d858ab308 100644 (file)
@@ -1876,40 +1876,43 @@ int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
        return status;
 }
 
-int nfs_commit_inode(struct inode *inode, int how)
+static int __nfs_commit_inode(struct inode *inode, int how,
+               struct writeback_control *wbc)
 {
        LIST_HEAD(head);
        struct nfs_commit_info cinfo;
        int may_wait = how & FLUSH_SYNC;
-       int error = 0;
-       int res;
+       int ret, nscan;
 
        nfs_init_cinfo_from_inode(&cinfo, inode);
        nfs_commit_begin(cinfo.mds);
-       res = nfs_scan_commit(inode, &head, &cinfo);
-       if (res)
-               error = nfs_generic_commit_list(inode, &head, how, &cinfo);
+       for (;;) {
+               ret = nscan = nfs_scan_commit(inode, &head, &cinfo);
+               if (ret <= 0)
+                       break;
+               ret = nfs_generic_commit_list(inode, &head, how, &cinfo);
+               if (ret < 0)
+                       break;
+               ret = 0;
+               if (wbc && wbc->sync_mode == WB_SYNC_NONE) {
+                       if (nscan < wbc->nr_to_write)
+                               wbc->nr_to_write -= nscan;
+                       else
+                               wbc->nr_to_write = 0;
+               }
+               if (nscan < INT_MAX)
+                       break;
+               cond_resched();
+       }
        nfs_commit_end(cinfo.mds);
-       if (res == 0)
-               return res;
-       if (error < 0)
-               goto out_error;
-       if (!may_wait)
-               goto out_mark_dirty;
-       error = wait_on_commit(cinfo.mds);
-       if (error < 0)
-               return error;
-       return res;
-out_error:
-       res = error;
-       /* Note: If we exit without ensuring that the commit is complete,
-        * we must mark the inode as dirty. Otherwise, future calls to
-        * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure
-        * that the data is on the disk.
-        */
-out_mark_dirty:
-       __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
-       return res;
+       if (ret || !may_wait)
+               return ret;
+       return wait_on_commit(cinfo.mds);
+}
+
+int nfs_commit_inode(struct inode *inode, int how)
+{
+       return __nfs_commit_inode(inode, how, NULL);
 }
 EXPORT_SYMBOL_GPL(nfs_commit_inode);
 
@@ -1919,11 +1922,11 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
        int flags = FLUSH_SYNC;
        int ret = 0;
 
-       /* no commits means nothing needs to be done */
-       if (!atomic_long_read(&nfsi->commit_info.ncommit))
-               return ret;
-
        if (wbc->sync_mode == WB_SYNC_NONE) {
+               /* no commits means nothing needs to be done */
+               if (!atomic_long_read(&nfsi->commit_info.ncommit))
+                       goto check_requests_outstanding;
+
                /* Don't commit yet if this is a non-blocking flush and there
                 * are a lot of outstanding writes for this mapping.
                 */
@@ -1934,16 +1937,16 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
                flags = 0;
        }
 
-       ret = nfs_commit_inode(inode, flags);
-       if (ret >= 0) {
-               if (wbc->sync_mode == WB_SYNC_NONE) {
-                       if (ret < wbc->nr_to_write)
-                               wbc->nr_to_write -= ret;
-                       else
-                               wbc->nr_to_write = 0;
-               }
-               return 0;
-       }
+       ret = __nfs_commit_inode(inode, flags, wbc);
+       if (!ret) {
+               if (flags & FLUSH_SYNC)
+                       return 0;
+       } else if (atomic_long_read(&nfsi->commit_info.ncommit))
+               goto out_mark_dirty;
+
+check_requests_outstanding:
+       if (!atomic_read(&nfsi->commit_info.rpcs_out))
+               return ret;
 out_mark_dirty:
        __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
        return ret;