RDMA/hns: Fix the illegal memory operation when cross page
authorWei Hu(Xavier) <xavier.huwei@huawei.com>
Mon, 28 May 2018 11:39:25 +0000 (19:39 +0800)
committerDoug Ledford <dledford@redhat.com>
Thu, 31 May 2018 00:45:03 +0000 (20:45 -0400)
This patch fixed the potential illegal operation when using the
extend sge buffer cross page in post send operation. The bug
will cause the calltrace as below.

[ 3302.922107] Unable to handle kernel paging request at virtual address ffff00003b3a0004
[ 3302.930009] Mem abort info:
[ 3302.932790]   Exception class = DABT (current EL), IL = 32 bits
[ 3302.938695]   SET = 0, FnV = 0
[ 3302.941735]   EA = 0, S1PTW = 0
[ 3302.944863] Data abort info:
[ 3302.947729]   ISV = 0, ISS = 0x00000047
[ 3302.951551]   CM = 0, WnR = 1
[ 3302.954506] swapper pgtable: 4k pages, 48-bit VAs, pgd = ffff000009ea5000
[ 3302.961279] [ffff00003b3a0004] *pgd=00000023dfffe003, *pud=00000023dfffd003, *pmd=00000022dc84c003, *pte=0000000000000000
[ 3302.972224] Internal error: Oops: 96000047 [#1] SMP
[ 3302.999509] CPU: 9 PID: 19628 Comm: roce_test_main Tainted: G           OE   4.14.10 #1
[ 3303.007498] task: ffff80234df78000 task.stack: ffff00000f640000
[ 3303.013412] PC is at hns_roce_v2_post_send+0x690/0xe20 [hns_roce_pci]
[ 3303.019843] LR is at hns_roce_v2_post_send+0x658/0xe20 [hns_roce_pci]
[ 3303.026269] pc : [<ffff0000020694f8>] lr : [<ffff0000020694c0>] pstate: 804001c9
[ 3303.033649] sp : ffff00000f643870
[ 3303.036951] x29: ffff00000f643870 x28: ffff80232bfa9c00
[ 3303.042250] x27: ffff80234d909380 x26: ffff00003b37f0c0
[ 3303.047549] x25: 0000000000000000 x24: 0000000000000003
[ 3303.052848] x23: 0000000000000000 x22: 0000000000000000
[ 3303.058148] x21: 0000000000000101 x20: 0000000000000001
[ 3303.063447] x19: ffff80236163f800 x18: 0000000000000000
[ 3303.068746] x17: 0000ffff86b76fc8 x16: ffff000008301600
[ 3303.074045] x15: 000020a51c000000 x14: 3128726464615f65
[ 3303.079344] x13: 746f6d6572202c29 x12: 303035312879656b
[ 3303.084643] x11: 723a6f666e692072 x10: 573a6f666e693a5d
[ 3303.089943] x9 : 0000000000000004 x8 : ffff8023ce38b000
[ 3303.095242] x7 : ffff8023ce38b320 x6 : 0000000000000418
[ 3303.100541] x5 : ffff80232bfa9cc8 x4 : 0000000000000030
[ 3303.105839] x3 : 0000000000000100 x2 : 0000000000000200
[ 3303.111138] x1 : 0000000000000320 x0 : ffff00003b3a0000
[ 3303.116438] Process roce_test_main (pid: 19628, stack limit = 0xffff00000f640000)
[ 3303.123906] Call trace:
[ 3303.126339] Exception stack(0xffff00000f643730 to 0xffff00000f643870)
[ 3303.215790] [<ffff0000020694f8>] hns_roce_v2_post_send+0x690/0xe20 [hns_roce_pci]
[ 3303.223293] [<ffff0000021c3750>] rt_ktest_post_send+0x5d0/0x8b8 [rdma_test]
[ 3303.230261] [<ffff0000021b3234>] exec_send_cmd+0x664/0x1350 [rdma_test]
[ 3303.236881] [<ffff0000021b8b30>] rt_ktest_dispatch_cmd_3+0x1510/0x3790 [rdma_test]
[ 3303.244455] [<ffff0000021bae54>] rt_ktest_dispatch_cmd_2+0xa4/0x118 [rdma_test]
[ 3303.251770] [<ffff0000021bafec>] rt_ktest_dispatch_cmd+0x124/0xaa8 [rdma_test]
[ 3303.258997] [<ffff0000021bbc3c>] rt_ktest_dev_write+0x2cc/0x568 [rdma_test]
[ 3303.265947] [<ffff0000082ad688>] __vfs_write+0x60/0x18c
[ 3303.271158] [<ffff0000082ad998>] vfs_write+0xa8/0x198
[ 3303.276196] [<ffff0000082adc7c>] SyS_write+0x6c/0xd4
[ 3303.281147] Exception stack(0xffff00000f643ec0 to 0xffff00000f644000)
[ 3303.287573] 3ec0: 0000000000000003 0000fffffc85faa8 0000000000004e60 0000000000000000
[ 3303.295388] 3ee0: 0000000021fb2000 000000000000ffff eff0e3efe4e58080 0000fffffcc724fe
[ 3303.303204] 3f00: 0000000000000040 1999999999999999 0101010101010101 0000000000000038
[ 3303.311019] 3f20: 0000000000000005 ffffffffffffffff 0d73757461747320 ffffffffffffffff
[ 3303.318835] 3f40: 0000000000000000 0000000000459b00 0000fffffc85e360 000000000043d788
[ 3303.326650] 3f60: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[ 3303.334465] 3f80: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[ 3303.342281] 3fa0: 0000000000000000 0000fffffc85e570 0000000000438804 0000fffffc85e570
[ 3303.350096] 3fc0: 0000ffff8553f618 0000000080000000 0000000000000003 0000000000000040
[ 3303.357911] 3fe0: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[ 3303.365729] [<ffff000008083808>] __sys_trace_return+0x0/0x4
[ 3303.371288] Code: b94008e9 34000129 b9400ce2 110006b5 (b9000402)
[ 3303.377377] ---[ end trace fd5ab98b3325cf9a ]---

Reported-by: Jie Chen <chenjie103@huawei.com>
Reported-by: Xiping Zhang (Francis) <zhangxiping3@huawei.com>
Fixes: b1c158350968("RDMA/hns: Get rid of virt_to_page and vmap calls after dma_alloc_coherent")
Signed-off-by: Wei Hu (Xavier) <xavier.huwei@huawei.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
drivers/infiniband/hw/hns/hns_roce_hw_v2.h

index 166f0469b5f5d2d780c0bbc4ba2d86922d944a19..0e8dad68910ab47a4952b384faa9c501bcf4c48b 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/etherdevice.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
+#include <linux/types.h>
 #include <net/addrconf.h>
 #include <rdma/ib_umem.h>
 
@@ -52,6 +53,53 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
        dseg->len  = cpu_to_le32(sg->length);
 }
 
+static void set_extend_sge(struct hns_roce_qp *qp, struct ib_send_wr *wr,
+                          unsigned int *sge_ind)
+{
+       struct hns_roce_v2_wqe_data_seg *dseg;
+       struct ib_sge *sg;
+       int num_in_wqe = 0;
+       int extend_sge_num;
+       int fi_sge_num;
+       int se_sge_num;
+       int shift;
+       int i;
+
+       if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC)
+               num_in_wqe = HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE;
+       extend_sge_num = wr->num_sge - num_in_wqe;
+       sg = wr->sg_list + num_in_wqe;
+       shift = qp->hr_buf.page_shift;
+
+       /*
+        * Check whether wr->num_sge sges are in the same page. If not, we
+        * should calculate how many sges in the first page and the second
+        * page.
+        */
+       dseg = get_send_extend_sge(qp, (*sge_ind) & (qp->sge.sge_cnt - 1));
+       fi_sge_num = (round_up((uintptr_t)dseg, 1 << shift) -
+                     (uintptr_t)dseg) /
+                     sizeof(struct hns_roce_v2_wqe_data_seg);
+       if (extend_sge_num > fi_sge_num) {
+               se_sge_num = extend_sge_num - fi_sge_num;
+               for (i = 0; i < fi_sge_num; i++) {
+                       set_data_seg_v2(dseg++, sg + i);
+                       (*sge_ind)++;
+               }
+               dseg = get_send_extend_sge(qp,
+                                          (*sge_ind) & (qp->sge.sge_cnt - 1));
+               for (i = 0; i < se_sge_num; i++) {
+                       set_data_seg_v2(dseg++, sg + fi_sge_num + i);
+                       (*sge_ind)++;
+               }
+       } else {
+               for (i = 0; i < extend_sge_num; i++) {
+                       set_data_seg_v2(dseg++, sg + i);
+                       (*sge_ind)++;
+               }
+       }
+}
+
 static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr,
                             struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
                             void *wqe, unsigned int *sge_ind,
@@ -85,7 +133,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr,
                roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S,
                             1);
        } else {
-               if (wr->num_sge <= 2) {
+               if (wr->num_sge <= HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) {
                        for (i = 0; i < wr->num_sge; i++) {
                                if (likely(wr->sg_list[i].length)) {
                                        set_data_seg_v2(dseg, wr->sg_list + i);
@@ -98,24 +146,14 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr,
                                     V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
                                     (*sge_ind) & (qp->sge.sge_cnt - 1));
 
-                       for (i = 0; i < 2; i++) {
+                       for (i = 0; i < HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; i++) {
                                if (likely(wr->sg_list[i].length)) {
                                        set_data_seg_v2(dseg, wr->sg_list + i);
                                        dseg++;
                                }
                        }
 
-                       dseg = get_send_extend_sge(qp,
-                                           (*sge_ind) & (qp->sge.sge_cnt - 1));
-
-                       for (i = 0; i < wr->num_sge - 2; i++) {
-                               if (likely(wr->sg_list[i + 2].length)) {
-                                       set_data_seg_v2(dseg,
-                                                       wr->sg_list + 2 + i);
-                                       dseg++;
-                                       (*sge_ind)++;
-                               }
-                       }
+                       set_extend_sge(qp, wr, sge_ind);
                }
 
                roce_set_field(rc_sq_wqe->byte_16,
@@ -319,13 +357,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0],
                               GID_LEN_V2);
 
-                       dseg = get_send_extend_sge(qp,
-                                           sge_ind & (qp->sge.sge_cnt - 1));
-                       for (i = 0; i < wr->num_sge; i++) {
-                               set_data_seg_v2(dseg + i, wr->sg_list + i);
-                               sge_ind++;
-                       }
-
+                       set_extend_sge(qp, wr, &sge_ind);
                        ind++;
                } else if (ibqp->qp_type == IB_QPT_RC) {
                        rc_sq_wqe = wqe;
index 2caeb4cdad5c2330283f3d4916d187c694af7d60..d47675f365c704f26aead3a510328ee39b273df8 100644 (file)
@@ -77,6 +77,7 @@
 #define HNS_ROCE_V2_MAX_INNER_MTPT_NUM         2
 #define HNS_ROCE_INVALID_LKEY                  0x100
 #define HNS_ROCE_CMQ_TX_TIMEOUT                        30000
+#define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE       2
 
 #define HNS_ROCE_CONTEXT_HOP_NUM               1
 #define HNS_ROCE_MTT_HOP_NUM                   1