xprtrdma: Honor ->send_request API contract
authorChuck Lever <chuck.lever@oracle.com>
Wed, 29 Jun 2016 17:53:43 +0000 (13:53 -0400)
committerAnna Schumaker <Anna.Schumaker@Netapp.com>
Mon, 11 Jul 2016 19:50:43 +0000 (15:50 -0400)
Commit c93c62231cf5 ("xprtrdma: Disconnect on registration failure")
added a disconnect for some RPC marshaling failures. This is needed
only in a handful of cases, but it was triggering for simple stuff
like temporary resource shortages. Try to straighten this out.

Fix up the lower layers so they don't return -ENOMEM or other error
codes that the RPC client's FSM doesn't explicitly recognize.

Also fix up the places in the send_request path that do want a
disconnect. For example, when ib_post_send or ib_post_recv fail,
this is a sign that there is a send or receive queue resource
miscalculation. That should be rare, and is a sign of a software
bug. But xprtrdma can recover: disconnect to reset the transport and
start over.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
net/sunrpc/xprtrdma/fmr_ops.c
net/sunrpc/xprtrdma/frwr_ops.c
net/sunrpc/xprtrdma/rpc_rdma.c
net/sunrpc/xprtrdma/transport.c
net/sunrpc/xprtrdma/verbs.c

index 8b6ce8ebe60f91cafb72ad949e77ebb7e406ee2e..aae4c372a40fa13e77662f2d80c2c1a4cb32acf9 100644 (file)
@@ -219,7 +219,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
                rpcrdma_defer_mr_recovery(mw);
        mw = rpcrdma_get_mw(r_xprt);
        if (!mw)
-               return -ENOMEM;
+               return -ENOBUFS;
 
        pageoff = offset_in_page(seg1->mr_offset);
        seg1->mr_offset -= pageoff;     /* start of page */
@@ -269,14 +269,14 @@ out_dmamap_err:
        pr_err("rpcrdma: failed to dma map sg %p sg_nents %u\n",
               mw->mw_sg, mw->mw_nents);
        rpcrdma_defer_mr_recovery(mw);
-       return -ENOMEM;
+       return -EIO;
 
 out_maperr:
        pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
               len, (unsigned long long)dma_pages[0],
               pageoff, mw->mw_nents, rc);
        rpcrdma_defer_mr_recovery(mw);
-       return rc;
+       return -EIO;
 }
 
 /* Invalidate all memory regions that were registered for "req".
index fc2826b3518c74abad5ab50c86008a49d9b2727e..d7613db9185d0c43ebe0d130974c02b9b98c543d 100644 (file)
@@ -382,7 +382,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
                        rpcrdma_defer_mr_recovery(mw);
                mw = rpcrdma_get_mw(r_xprt);
                if (!mw)
-                       return -ENOMEM;
+                       return -ENOBUFS;
        } while (mw->frmr.fr_state != FRMR_IS_INVALID);
        frmr = &mw->frmr;
        frmr->fr_state = FRMR_IS_VALID;
@@ -456,18 +456,18 @@ out_dmamap_err:
        pr_err("rpcrdma: failed to dma map sg %p sg_nents %u\n",
               mw->mw_sg, mw->mw_nents);
        rpcrdma_defer_mr_recovery(mw);
-       return -ENOMEM;
+       return -EIO;
 
 out_mapmr_err:
        pr_err("rpcrdma: failed to map mr %p (%u/%u)\n",
               frmr->fr_mr, n, mw->mw_nents);
-       rc = n < 0 ? n : -EIO;
        rpcrdma_defer_mr_recovery(mw);
-       return rc;
+       return -EIO;
 
 out_senderr:
+       pr_err("rpcrdma: FRMR registration ib_post_send returned %i\n", rc);
        rpcrdma_defer_mr_recovery(mw);
-       return rc;
+       return -ENOTCONN;
 }
 
 static struct ib_send_wr *
@@ -569,7 +569,8 @@ unmap:
        return;
 
 reset_mrs:
-       pr_warn("%s: ib_post_send failed %i\n", __func__, rc);
+       pr_err("rpcrdma: FRMR invalidate ib_post_send returned %i\n", rc);
+       rdma_disconnect(ia->ri_id);
 
        /* Find and reset the MRs in the LOCAL_INV WRs that did not
         * get posted. This is synchronous, and slow.
index 35a81096e83d50bd501726ed1d9376a5e4bcf54d..77e002f4d005512b0f6eb366433b1e8ac52e10e7 100644 (file)
@@ -251,7 +251,7 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
                        /* alloc the pagelist for receiving buffer */
                        ppages[p] = alloc_page(GFP_ATOMIC);
                        if (!ppages[p])
-                               return -ENOMEM;
+                               return -EAGAIN;
                }
                seg[n].mr_page = ppages[p];
                seg[n].mr_offset = (void *)(unsigned long) page_base;
index 4c8e7f11b906fd74c0cc1fc2133f7efe76671b4e..be4dd2c7c680325459c0f6694054bebc77fc22da 100644 (file)
@@ -558,7 +558,6 @@ out_sendbuf:
 
 out_fail:
        rpcrdma_buffer_put(req);
-       r_xprt->rx_stats.failed_marshal_count++;
        return NULL;
 }
 
@@ -590,8 +589,19 @@ xprt_rdma_free(void *buffer)
        rpcrdma_buffer_put(req);
 }
 
-/*
+/**
+ * xprt_rdma_send_request - marshal and send an RPC request
+ * @task: RPC task with an RPC message in rq_snd_buf
+ *
+ * Return values:
+ *        0:   The request has been sent
+ * ENOTCONN:   Caller needs to invoke connect logic then call again
+ *  ENOBUFS:   Call again later to send the request
+ *      EIO:   A permanent error occurred. The request was not sent,
+ *             and don't try it again
+ *
  * send_request invokes the meat of RPC RDMA. It must do the following:
+ *
  *  1.  Marshal the RPC request into an RPC RDMA request, which means
  *     putting a header in front of data, and creating IOVs for RDMA
  *     from those in the request.
@@ -600,7 +610,6 @@ xprt_rdma_free(void *buffer)
  *     the request (rpcrdma_ep_post).
  *  4.  No partial sends are possible in the RPC-RDMA protocol (as in UDP).
  */
-
 static int
 xprt_rdma_send_request(struct rpc_task *task)
 {
@@ -630,11 +639,12 @@ xprt_rdma_send_request(struct rpc_task *task)
        return 0;
 
 failed_marshal:
-       r_xprt->rx_stats.failed_marshal_count++;
        dprintk("RPC:       %s: rpcrdma_marshal_req failed, status %i\n",
                __func__, rc);
        if (rc == -EIO)
-               return -EIO;
+               r_xprt->rx_stats.failed_marshal_count++;
+       if (rc != -ENOTCONN)
+               return rc;
 drop_connection:
        xprt_disconnect_done(xprt);
        return -ENOTCONN;       /* implies disconnect */
index 6fb73ff2618385ab8d7b5ef61739c17062b13456..db935ed3ac75aa6b3de8afc0707849a624341b8f 100644 (file)
@@ -1151,7 +1151,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
        if (rep) {
                rc = rpcrdma_ep_post_recv(ia, ep, rep);
                if (rc)
-                       goto out;
+                       return rc;
                req->rl_reply = NULL;
        }
 
@@ -1176,10 +1176,12 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
 
        rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
        if (rc)
-               dprintk("RPC:       %s: ib_post_send returned %i\n", __func__,
-                       rc);
-out:
-       return rc;
+               goto out_postsend_err;
+       return 0;
+
+out_postsend_err:
+       pr_err("rpcrdma: RDMA Send ib_post_send returned %i\n", rc);
+       return -ENOTCONN;
 }
 
 /*
@@ -1204,11 +1206,13 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
                                   DMA_BIDIRECTIONAL);
 
        rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
-
        if (rc)
-               dprintk("RPC:       %s: ib_post_recv returned %i\n", __func__,
-                       rc);
-       return rc;
+               goto out_postrecv;
+       return 0;
+
+out_postrecv:
+       pr_err("rpcrdma: ib_post_recv returned %i\n", rc);
+       return -ENOTCONN;
 }
 
 /**