nvme-rdma: support up to 4 segments of inline data
authorSteve Wise <swise@opengridcomputing.com>
Wed, 20 Jun 2018 14:15:05 +0000 (07:15 -0700)
committerChristoph Hellwig <hch@lst.de>
Mon, 23 Jul 2018 07:35:15 +0000 (09:35 +0200)
Allow up to 4 segments of inline data for NVMF WRITE operations. This
reduces latency for small WRITEs by removing the need for the target to
issue a READ WR for IB, or a REG_MR + READ WR chain for iWarp.

Also cap the inline segments used based on the limitations of the
device.

Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
drivers/nvme/host/rdma.c

index 518c5b09038c1e9041a89590a6d79101995650d4..363f73fe549c34528b5b3c5c08209ab4b5a54eb8 100644 (file)
 
 #define NVME_RDMA_MAX_SEGMENTS         256
 
-#define NVME_RDMA_MAX_INLINE_SEGMENTS  1
+#define NVME_RDMA_MAX_INLINE_SEGMENTS  4
 
 struct nvme_rdma_device {
        struct ib_device        *dev;
        struct ib_pd            *pd;
        struct kref             ref;
        struct list_head        entry;
+       unsigned int            num_inline_segments;
 };
 
 struct nvme_rdma_qe {
@@ -117,6 +118,7 @@ struct nvme_rdma_ctrl {
        struct sockaddr_storage src_addr;
 
        struct nvme_ctrl        ctrl;
+       bool                    use_inline_data;
 };
 
 static inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct nvme_ctrl *ctrl)
@@ -249,7 +251,7 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
        /* +1 for drain */
        init_attr.cap.max_recv_wr = queue->queue_size + 1;
        init_attr.cap.max_recv_sge = 1;
-       init_attr.cap.max_send_sge = 1 + NVME_RDMA_MAX_INLINE_SEGMENTS;
+       init_attr.cap.max_send_sge = 1 + dev->num_inline_segments;
        init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
        init_attr.qp_type = IB_QPT_RC;
        init_attr.send_cq = queue->ib_cq;
@@ -374,6 +376,8 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id)
                goto out_free_pd;
        }
 
+       ndev->num_inline_segments = min(NVME_RDMA_MAX_INLINE_SEGMENTS,
+                                       ndev->dev->attrs.max_sge - 1);
        list_add(&ndev->entry, &device_list);
 out_unlock:
        mutex_unlock(&device_list_mutex);
@@ -925,6 +929,9 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
        if (ret)
                goto requeue;
 
+       if (ctrl->ctrl.sgls & (1 << 20))
+               ctrl->use_inline_data = true;
+
        if (ctrl->ctrl.queue_count > 1) {
                ret = nvme_rdma_configure_io_queues(ctrl, false);
                if (ret)
@@ -1090,19 +1097,27 @@ static int nvme_rdma_set_sg_null(struct nvme_command *c)
 }
 
 static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue,
-               struct nvme_rdma_request *req, struct nvme_command *c)
+               struct nvme_rdma_request *req, struct nvme_command *c,
+               int count)
 {
        struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
+       struct scatterlist *sgl = req->sg_table.sgl;
+       struct ib_sge *sge = &req->sge[1];
+       u32 len = 0;
+       int i;
 
-       req->sge[1].addr = sg_dma_address(req->sg_table.sgl);
-       req->sge[1].length = sg_dma_len(req->sg_table.sgl);
-       req->sge[1].lkey = queue->device->pd->local_dma_lkey;
+       for (i = 0; i < count; i++, sgl++, sge++) {
+               sge->addr = sg_dma_address(sgl);
+               sge->length = sg_dma_len(sgl);
+               sge->lkey = queue->device->pd->local_dma_lkey;
+               len += sge->length;
+       }
 
        sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff);
-       sg->length = cpu_to_le32(sg_dma_len(req->sg_table.sgl));
+       sg->length = cpu_to_le32(len);
        sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET;
 
-       req->num_sge++;
+       req->num_sge += count;
        return 0;
 }
 
@@ -1195,15 +1210,16 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
                goto out_free_table;
        }
 
-       if (count == 1) {
+       if (count <= dev->num_inline_segments) {
                if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
+                   queue->ctrl->use_inline_data &&
                    blk_rq_payload_bytes(rq) <=
                                nvme_rdma_inline_data_size(queue)) {
-                       ret = nvme_rdma_map_sg_inline(queue, req, c);
+                       ret = nvme_rdma_map_sg_inline(queue, req, c, count);
                        goto out;
                }
 
-               if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
+               if (count == 1 && dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
                        ret = nvme_rdma_map_sg_single(queue, req, c);
                        goto out;
                }