pNFS: Add tracking to limit the number of pNFS retries
authorTrond Myklebust <trondmy@gmail.com>
Sun, 7 Apr 2019 17:59:08 +0000 (13:59 -0400)
committerAnna Schumaker <Anna.Schumaker@Netapp.com>
Thu, 25 Apr 2019 18:18:14 +0000 (14:18 -0400)
When the client is reading or writing using pNFS, and hits an error
on the DS, then it typically sends a LAYOUTERROR and/or LAYOUTRETURN
to the MDS, before redirtying the failed pages, and going for a new
round of reads/writebacks. The problem is that if the server has no
way to fix the DS, then we may need a way to interrupt this loop
after a set number of attempts have been made.
This patch adds an optional module parameter that allows the admin
to specify how many times to retry the read/writeback process before
failing with a fatal error.
The default behaviour is to retry forever.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
fs/nfs/direct.c
fs/nfs/flexfilelayout/flexfilelayout.c
fs/nfs/pagelist.c
fs/nfs/write.c
include/linux/nfs_page.h

index 2d301a1a73e23f7057b69aaa31f5ae3f80af62c7..2436bd92bc005a9799ea9e88814986e28dd6c0ec 100644 (file)
@@ -663,6 +663,8 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
        }
 
        list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
+               /* Bump the transmission count */
+               req->wb_nio++;
                if (!nfs_pageio_add_request(&desc, req)) {
                        nfs_list_move_request(req, &failed);
                        spin_lock(&cinfo.inode->i_lock);
@@ -703,6 +705,11 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
                req = nfs_list_entry(data->pages.next);
                nfs_list_remove_request(req);
                if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) {
+                       /*
+                        * Despite the reboot, the write was successful,
+                        * so reset wb_nio.
+                        */
+                       req->wb_nio = 0;
                        /* Note the rewrite will go through mds */
                        nfs_mark_request_commit(req, NULL, &cinfo, 0);
                } else
index 6673d4ff5a2a846c01e2de3e909e167da30156cb..9fdbcfd3e39d7ff36bea25b3ce58e74d5c7dd722 100644 (file)
@@ -28,6 +28,8 @@
 #define FF_LAYOUT_POLL_RETRY_MAX     (15*HZ)
 #define FF_LAYOUTRETURN_MAXERR 20
 
+static unsigned short io_maxretrans;
+
 static void ff_layout_read_record_layoutstats_done(struct rpc_task *task,
                struct nfs_pgio_header *hdr);
 static int ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
@@ -925,6 +927,7 @@ retry:
        pgm = &pgio->pg_mirrors[0];
        pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize;
 
+       pgio->pg_maxretrans = io_maxretrans;
        return;
 out_nolseg:
        if (pgio->pg_error < 0)
@@ -992,6 +995,7 @@ retry:
                pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize;
        }
 
+       pgio->pg_maxretrans = io_maxretrans;
        return;
 
 out_mds:
@@ -2515,3 +2519,7 @@ MODULE_DESCRIPTION("The NFSv4 flexfile layout driver");
 
 module_init(nfs4flexfilelayout_init);
 module_exit(nfs4flexfilelayout_exit);
+
+module_param(io_maxretrans, ushort, 0644);
+MODULE_PARM_DESC(io_maxretrans, "The  number of times the NFSv4.1 client "
+                       "retries an I/O request before returning an error. ");
index b8301c40dd78adc945cccd570f580ecf8e44dea9..4a31284f411e9e0599f734b2da879e657b10571a 100644 (file)
@@ -16,8 +16,8 @@
 #include <linux/nfs.h>
 #include <linux/nfs3.h>
 #include <linux/nfs4.h>
-#include <linux/nfs_page.h>
 #include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
 #include <linux/nfs_mount.h>
 #include <linux/export.h>
 
@@ -327,6 +327,7 @@ __nfs_create_request(struct nfs_lock_context *l_ctx, struct page *page,
        req->wb_bytes   = count;
        req->wb_context = get_nfs_open_context(ctx);
        kref_init(&req->wb_kref);
+       req->wb_nio = 0;
        return req;
 }
 
@@ -370,6 +371,7 @@ nfs_create_subreq(struct nfs_page *req, struct nfs_page *last,
                nfs_lock_request(ret);
                ret->wb_index = req->wb_index;
                nfs_page_group_init(ret, last);
+               ret->wb_nio = req->wb_nio;
        }
        return ret;
 }
@@ -724,6 +726,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
        desc->pg_mirrors_dynamic = NULL;
        desc->pg_mirrors = desc->pg_mirrors_static;
        nfs_pageio_mirror_init(&desc->pg_mirrors[0], bsize);
+       desc->pg_maxretrans = 0;
 }
 
 /**
@@ -983,6 +986,15 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
                        return 0;
                mirror->pg_base = req->wb_pgbase;
        }
+
+       if (desc->pg_maxretrans && req->wb_nio > desc->pg_maxretrans) {
+               if (NFS_SERVER(desc->pg_inode)->flags & NFS_MOUNT_SOFTERR)
+                       desc->pg_error = -ETIMEDOUT;
+               else
+                       desc->pg_error = -EIO;
+               return 0;
+       }
+
        if (!nfs_can_coalesce_requests(prev, req, desc))
                return 0;
        nfs_list_move_request(req, &mirror->pg_list);
index b9bcbd06a628d85ed9f2518e77cd289f4f27ac8e..294604784f708db35cbd4c53bb1db053bd94c457 100644 (file)
@@ -1009,6 +1009,8 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
                        goto remove_req;
                }
                if (nfs_write_need_commit(hdr)) {
+                       /* Reset wb_nio, since the write was successful. */
+                       req->wb_nio = 0;
                        memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
                        nfs_mark_request_commit(req, hdr->lseg, &cinfo,
                                hdr->pgio_mirror_idx);
@@ -1142,6 +1144,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
                req->wb_bytes = end - req->wb_offset;
        else
                req->wb_bytes = rqend - req->wb_offset;
+       req->wb_nio = 0;
        return req;
 out_flushme:
        /*
@@ -1416,6 +1419,8 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr,
  */
 static void nfs_redirty_request(struct nfs_page *req)
 {
+       /* Bump the transmission count */
+       req->wb_nio++;
        nfs_mark_request_dirty(req);
        set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags);
        nfs_end_page_writeback(req);
index b7d0f15615c2a0536b0112b653a1bdc863cf24a9..8b36800d342d24f0fe526ea99773a4352fb3dcc2 100644 (file)
@@ -53,6 +53,7 @@ struct nfs_page {
        struct nfs_write_verifier       wb_verf;        /* Commit cookie */
        struct nfs_page         *wb_this_page;  /* list of reqs for this page */
        struct nfs_page         *wb_head;       /* head pointer for req list */
+       unsigned short          wb_nio;         /* Number of I/O attempts */
 };
 
 struct nfs_pageio_descriptor;
@@ -87,7 +88,6 @@ struct nfs_pgio_mirror {
 };
 
 struct nfs_pageio_descriptor {
-       unsigned char           pg_moreio : 1;
        struct inode            *pg_inode;
        const struct nfs_pageio_ops *pg_ops;
        const struct nfs_rw_ops *pg_rw_ops;
@@ -105,6 +105,8 @@ struct nfs_pageio_descriptor {
        struct nfs_pgio_mirror  pg_mirrors_static[1];
        struct nfs_pgio_mirror  *pg_mirrors_dynamic;
        u32                     pg_mirror_idx;  /* current mirror */
+       unsigned short          pg_maxretrans;
+       unsigned char           pg_moreio : 1;
 };
 
 /* arbitrarily selected limit to number of mirrors */